sdf2psi4.py -- a Python script that simplifies psi4 setup and post-processing

I wrote a python script that makes it easy to setup psi4 calculations and visualize results when using SD files as input. The script uses purely open source codes (RDKit, OpenBabel) to handle SD file manipulation. It eliminates the manual steps of copying coordinates when setting up simple calculations like geometry optimization. It also performs post-processing of Psi4 output to create a SD file containing the optimized coordinates and stores the energy as a SD tag.

I thought I would share this in case anyone find it useful. Your comments on how to improve it would be appreciated.

sdf2psi4.py code:

  #!/usr/bin/python
from __future__ import print_function
from __future__ import division
__author__ = 'JW Feng'
import sys
import os
import argparse
from string import Template
from rdkit import Chem

# TODO
# add feature to process mult record sd files
# sd file should have a tag that contains unique molecule names and those names will be used
# as prefix for psi4 filenames

# return atom elements and XYZ coordinates for a given SD file
# return only values for first molecule if there are multiple molecules
def get_xyz(infile):
    # RDKit suppresses hydrogen atoms by dfault, use removeHs=False to keep explicit hydrogen atoms
    suppl = Chem.SDMolSupplier(infile, removeHs=False)
    # test each molecule to see if it was correctly read before working with it
    mol = suppl.next()
    if mol is None:
        print ("skipping mol", file=sys.stderr)
        return None
    num_atoms = mol.GetNumAtoms()
    xyz_string=""
    for counter in range(num_atoms):
        pos=mol.GetConformer().GetAtomPosition(counter)
        xyz_string = xyz_string + ("%s %12.6f %12.6f %12.6f\n" % (mol.GetAtomWithIdx(counter).GetSymbol(), pos.x, pos.y, pos.z) )

    return xyz_string


def replace_coords_in_psi4_template(template_file, xyz_string, file_to_save, email):
    data = open(template_file, 'r').read()
    template = Template(data)
    new_data = template.substitute(COORDINATES=xyz_string, XYZ_FILE_TO_SAVE=file_to_save+".xyz", SD_FILE_TO_SAVE=file_to_save+".sdf", EMAIL=email)
    #print (new_data)
    return new_data
def main(argv=None):

    # usage statement
    program_description = """Prepare input files for Psi4"""
    epilog_text = """text appears after help statement, can include examples on how to run this program"""
    parser = argparse.ArgumentParser(description=program_description, epilog=epilog_text)

    # optional requirements, "required=True" makes it NOT optional
    parser.add_argument("-in", dest="infile", required=True, help="input file")
    parser.add_argument("-out", dest="outfile", help="output file, ignores prefix for output file if out is specified")
    parser.add_argument("-template", dest="template", required=True, help="Psi4 template file")
    parser.add_argument("-prefix", dest="prefix", help="prefix for psi4 input files, default=psi4_prefix_", default="psi4_prefix_")
    parser.add_argument("-email", dest="email", required=True, help="email address to sent results")
    #parser.add_argument("-torsion", dest="torsion", help="atom numbers specifying torsion to be scanned")
    args=None
    try:
        args = parser.parse_args(argv)
    except:
        # useful parser functions
        parser.print_help()
        sys.stderr.write("Input parameters were incorrect, please check help message\n")
        return 2

    if ( not args.infile.endswith(".sdf") ):
        sys.stderr.write("Input file must end with .sdf")
        return 2

    # get atom elements and  molecule coordinates in XYZ format
    xyz_string = get_xyz(args.infile)
    #print (xyz_string)

    # create result file name for storing psi4 output using basename of inputfile
    file_base = os.path.splitext(os.path.basename(args.infile))[0]
    result_file = args.prefix + file_base
    psi4_file = args.prefix + file_base + ".opt"
    if args.outfile is not None:
        psi4_file = args.outfile

    # insert XYZ and prefix into Psi4 template
    psi4_string = replace_coords_in_psi4_template(args.template, xyz_string, result_file, args.email)
    fh = open (psi4_file, 'w')
    fh.write(psi4_string)
    fh.close()

    sys.stderr.write("To start psi4 job, run: psi4 " +  psi4_file + " -n nCPUs\n")
    #print (psi4_string)

if __name__ == "__main__":
    # Let main()'s return value specify the exit status.
    sys.exit(main())

Template script

# optimize with HF/6-31g*
# calculate energy with MP2/g-311+g**

memory 8 GB   # use as much as you’ve got

# For Cartesian coordinates, use XYZ coordinates
# COORDINATES will be replaced with XYZ or ZMatric coordinates
molecule my_mol {
$COORDINATES}

# set smaller basis set for geometry HT geometry optimization
set {
    basis 6-31G*
    scf_type df
    guess sad
    ints_tolerance 1.0E-8
}

optimize('scf')   # geometry optimization with HF

# change basis set for single point energy calculation
set {
    basis 6-311+G**
    scf_type df
    guess sad
}
#DFT_energy = energy ('b3lyp') * psi_hartree2kcalmol # single point energy
MP2_energy = energy ('df-mp2') * psi_hartree2kcalmol  # calculate single point energy with MP2

# XYZ_FILE_TO_SAVE and SD_FILE_TO_SAVE will be replaced with filenames
my_mol.save_xyz_file("$XYZ_FILE_TO_SAVE", False)
command="obabel -ixyz $XYZ_FILE_TO_SAVE -osd $SD_FILE_TO_SAVE --property 'MP2_Energy(kcal/mol)' %.4f " % MP2_energy
os.system(command)
command='echo "Psi4 results for $SD_FILE_TO_SAVE " | mail -s "Psi4 results" -a $SD_FILE_TO_SAVE $EMAIL'
os.system(command)

sdf2psi.py and opt_HF_MP2energy.dat uploaded as attachments

sdf2psi4.py (3.7 KB) opt_HF_MP2energy.dat (1.2 KB)

Hello,

I am interested in using your script to convert a SD file to the format with which I can run a PSI4 calculation on it. I am not sure how to use it correctly. For example, when I attempted to run the command

python sdf2psi4.py mysdffile.sdf

I got the following error on my console:

python sdf2psi4.py testmol3D_min.sdf
Traceback (most recent call last):
File “sdf2psi4.py”, line 9, in
from rdkit import Chem
ImportError: No module named rdkit

any help in this conversion will be gratefully acknowledged.

Many thanks,
Shashi Rao

Hi,

This script requires RDKit. Installation instructions are available on http://rdkit.org/

JW