Add gaussian 94 support

2025-01-05 02:48:45 +01:00 · 2015-03-17 15:03:38 +01:00 · 2015-03-17 15:03:38 +01:00 · 0af5cb6fa1
commit 0af5cb6fa1
parent 68d5349857
2 changed files with 89 additions and 5 deletions
--- a/src/EMSL_local.py
+++ b/src/EMSL_local.py
@ -229,13 +229,13 @@ class EMSL_local:
        str_ = """SELECT DISTINCT elt
                  FROM output_tab
-                  WHERE name=:name_us COLLATE NOCASE"""
+                  WHERE name=(?) COLLATE NOCASE"""
        # ~#~#~#~#~ #
        # F e t c h #
        # ~#~#~#~#~ #
-        self.c.execute(str_, {"name_us": basis_name})
+        self.c.execute(str_, basis_name)
        # ~#~#~#~#~#~ #
        # R e t u r n #
--- a/src/parser.py
+++ b/src/parser.py
@ -1,5 +1,6 @@
 import sys
 import os
 import re
 def get_dict_ele():
@ -74,8 +75,6 @@ def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
    return [name, des, basis_data]
 import re
 symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
@ -158,6 +157,91 @@ def handle_l_gamess_us(l_atom_basis):
    return l_data
 #  __                            _
 # /__  _.      _  _ o  _. ._    (_| |_|_
 # \_| (_| |_| _> _> | (_| | |     |   |
 #
 def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
    """Parse the Gaussian94 basis data raw html to get a nice tuple.
    The data-pairs item is actually expected to be a 2 item list:
    [symbol, data]
    e.g. ["Ca", "#BASIS SET..."]
    N.B.: Currently ignores ECP data!
    @param data: raw HTML from BSE
    @type data : unicode
    @param name: basis set name
    @type name : str
    @param des: basis set description
    @type des : str
    @param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
    @type elements : list
    @return: (name, description, data-pairs)
    @rtype : tuple
    """
    d = []
    # Each basis set block starts and ends with ****. Find the region
    # containing all the basis blocks using the first and last ****.
    mark = "****"
    begin = data.find(mark)
    end = data.rfind(mark)
    if begin == -1 or end == -1:
        if debug:
            print(data)
        str_ = " No basis set data found while attempting to process {0} ({1})"
        raise ValueError(str_.format(name, description))
    trimmed = data[begin + len(mark): end - len(mark)].strip()
    chunks = []
    lines = []
    # group lines of data delimited by mark into per-element chunks
    for line in trimmed.split("\n"):
        if line.startswith(mark):
            if lines:
                chunks.append(lines)
            lines = [line]
        else:
            lines.append(line)
    # handle trailing chunk that is not followed by another basis set block
    # also remove the marker lines from the chunk itself
    if lines and (not chunks or lines != chunks[-1]):
        chunks.append(lines)
    # join lines back into solid text blocks
    chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
    # check each block for element and assign symbols to final pairs
    pairs = []
    unused_elements = set([e.upper() for e in elements])
    for chunk in chunks:
        # get first 3 chars of first line in block
        symbol = chunk.split("\n")[0][:3].strip()
        try:
            unused_elements.remove(symbol.upper())
        except KeyError:
            if debug:
                msg = "Warning: already processed {0}\n".format(symbol)
                sys.stderr.write(msg)
        pairs.append([symbol, chunk])
    if unused_elements:
        msg = "Warning: elements {0} left over for {1}".format(
            list(unused_elements),
            name)
        print(msg)
    return [name, description, pairs]
 # ______                         _         _ _      _
 # |  ___|                       | |       | (_)    | |
 # | |_ _ __ ___  _ __ ___   __ _| |_    __| |_  ___| |_
@ -166,7 +250,7 @@ def handle_l_gamess_us(l_atom_basis):
 # \_| |_|  \___/|_| |_| |_|\__,_|\__|  \__,_|_|\___|\__|
 #
-format_dict = {"Gaussian94": None,
+format_dict = {"Gaussian94": parse_basis_data_gaussian94,
               "GAMESS-US": parse_basis_data_gamess_us,
               "GAMESS-UK": None,
               "Turbomole": None,