Add gaussian 94 support

2025-01-03 01:55:54 +01:00 · 2015-03-17 15:03:38 +01:00 · 2015-03-17 15:03:38 +01:00 · 0af5cb6fa1
commit 0af5cb6fa1
parent 68d5349857
2 changed files with 89 additions and 5 deletions
--- a/src/EMSL_local.py
+++ b/src/EMSL_local.py
@ -229,13 +229,13 @@ class EMSL_local:

        str_ = """SELECT DISTINCT elt
                  FROM output_tab
-                  WHERE name=:name_us COLLATE NOCASE"""
+                  WHERE name=(?) COLLATE NOCASE"""

        # ~#~#~#~#~ #
        # F e t c h #
        # ~#~#~#~#~ #

-        self.c.execute(str_, {"name_us": basis_name})
+        self.c.execute(str_, basis_name)

        # ~#~#~#~#~#~ #
        # R e t u r n #
--- a/src/parser.py
+++ b/src/parser.py
@ -1,5 +1,6 @@
 import sys
 import os
+import re


 def get_dict_ele():
@ -74,8 +75,6 @@ def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
    return [name, des, basis_data]


-import re
-
 symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')


@ -158,6 +157,91 @@ def handle_l_gamess_us(l_atom_basis):

    return l_data

+
+#  __                            _
+# /__  _.      _  _ o  _. ._    (_| |_|_
+# \_| (_| |_| _> _> | (_| | |     |   |
+#
+def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
+    """Parse the Gaussian94 basis data raw html to get a nice tuple.
+
+    The data-pairs item is actually expected to be a 2 item list:
+    [symbol, data]
+
+    e.g. ["Ca", "#BASIS SET..."]
+
+    N.B.: Currently ignores ECP data!
+
+    @param data: raw HTML from BSE
+    @type data : unicode
+    @param name: basis set name
+    @type name : str
+    @param des: basis set description
+    @type des : str
+    @param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
+    @type elements : list
+    @return: (name, description, data-pairs)
+    @rtype : tuple
+    """
+
+    d = []
+
+    # Each basis set block starts and ends with ****. Find the region
+    # containing all the basis blocks using the first and last ****.
+    mark = "****"
+    begin = data.find(mark)
+    end = data.rfind(mark)
+
+    if begin == -1 or end == -1:
+        if debug:
+            print(data)
+        str_ = " No basis set data found while attempting to process {0} ({1})"
+        raise ValueError(str_.format(name, description))
+
+    trimmed = data[begin + len(mark): end - len(mark)].strip()
+    chunks = []
+    lines = []
+
+    # group lines of data delimited by mark into per-element chunks
+    for line in trimmed.split("\n"):
+        if line.startswith(mark):
+            if lines:
+                chunks.append(lines)
+            lines = [line]
+        else:
+            lines.append(line)
+
+    # handle trailing chunk that is not followed by another basis set block
+    # also remove the marker lines from the chunk itself
+    if lines and (not chunks or lines != chunks[-1]):
+        chunks.append(lines)
+
+    # join lines back into solid text blocks
+    chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
+
+    # check each block for element and assign symbols to final pairs
+    pairs = []
+    unused_elements = set([e.upper() for e in elements])
+    for chunk in chunks:
+        # get first 3 chars of first line in block
+        symbol = chunk.split("\n")[0][:3].strip()
+        try:
+            unused_elements.remove(symbol.upper())
+        except KeyError:
+            if debug:
+                msg = "Warning: already processed {0}\n".format(symbol)
+                sys.stderr.write(msg)
+        pairs.append([symbol, chunk])
+
+    if unused_elements:
+        msg = "Warning: elements {0} left over for {1}".format(
+            list(unused_elements),
+            name)
+        print(msg)
+
+    return [name, description, pairs]
+
+
 # ______                         _         _ _      _
 # |  ___|                       | |       | (_)    | |
 # | |_ _ __ ___  _ __ ___   __ _| |_    __| |_  ___| |_
@ -166,7 +250,7 @@ def handle_l_gamess_us(l_atom_basis):
 # \_| |_|  \___/|_| |_| |_|\__,_|\__|  \__,_|_|\___|\__|
 #

-format_dict = {"Gaussian94": None,
+format_dict = {"Gaussian94": parse_basis_data_gaussian94,
               "GAMESS-US": parse_basis_data_gamess_us,
               "GAMESS-UK": None,
               "Turbomole": None,