10
0
mirror of https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local synced 2025-01-03 01:55:54 +01:00

Add gaussian 94 support

This commit is contained in:
Thomas Applencourt 2015-03-17 15:03:38 +01:00
parent 68d5349857
commit 0af5cb6fa1
2 changed files with 89 additions and 5 deletions

View File

@ -229,13 +229,13 @@ class EMSL_local:
str_ = """SELECT DISTINCT elt
FROM output_tab
WHERE name=:name_us COLLATE NOCASE"""
WHERE name=(?) COLLATE NOCASE"""
# ~#~#~#~#~ #
# F e t c h #
# ~#~#~#~#~ #
self.c.execute(str_, {"name_us": basis_name})
self.c.execute(str_, basis_name)
# ~#~#~#~#~#~ #
# R e t u r n #

View File

@ -1,5 +1,6 @@
import sys
import os
import re
def get_dict_ele():
@ -74,8 +75,6 @@ def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
return [name, des, basis_data]
import re
symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
@ -158,6 +157,91 @@ def handle_l_gamess_us(l_atom_basis):
return l_data
# __ _
# /__ _. _ _ o _. ._ (_| |_|_
# \_| (_| |_| _> _> | (_| | | | |
#
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
The data-pairs item is actually expected to be a 2 item list:
[symbol, data]
e.g. ["Ca", "#BASIS SET..."]
N.B.: Currently ignores ECP data!
@param data: raw HTML from BSE
@type data : unicode
@param name: basis set name
@type name : str
@param des: basis set description
@type des : str
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
@type elements : list
@return: (name, description, data-pairs)
@rtype : tuple
"""
d = []
# Each basis set block starts and ends with ****. Find the region
# containing all the basis blocks using the first and last ****.
mark = "****"
begin = data.find(mark)
end = data.rfind(mark)
if begin == -1 or end == -1:
if debug:
print(data)
str_ = " No basis set data found while attempting to process {0} ({1})"
raise ValueError(str_.format(name, description))
trimmed = data[begin + len(mark): end - len(mark)].strip()
chunks = []
lines = []
# group lines of data delimited by mark into per-element chunks
for line in trimmed.split("\n"):
if line.startswith(mark):
if lines:
chunks.append(lines)
lines = [line]
else:
lines.append(line)
# handle trailing chunk that is not followed by another basis set block
# also remove the marker lines from the chunk itself
if lines and (not chunks or lines != chunks[-1]):
chunks.append(lines)
# join lines back into solid text blocks
chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
# check each block for element and assign symbols to final pairs
pairs = []
unused_elements = set([e.upper() for e in elements])
for chunk in chunks:
# get first 3 chars of first line in block
symbol = chunk.split("\n")[0][:3].strip()
try:
unused_elements.remove(symbol.upper())
except KeyError:
if debug:
msg = "Warning: already processed {0}\n".format(symbol)
sys.stderr.write(msg)
pairs.append([symbol, chunk])
if unused_elements:
msg = "Warning: elements {0} left over for {1}".format(
list(unused_elements),
name)
print(msg)
return [name, description, pairs]
# ______ _ _ _ _
# | ___| | | | (_) | |
# | |_ _ __ ___ _ __ ___ __ _| |_ __| |_ ___| |_
@ -166,7 +250,7 @@ def handle_l_gamess_us(l_atom_basis):
# \_| |_| \___/|_| |_| |_|\__,_|\__| \__,_|_|\___|\__|
#
format_dict = {"Gaussian94": None,
format_dict = {"Gaussian94": parse_basis_data_gaussian94,
"GAMESS-US": parse_basis_data_gamess_us,
"GAMESS-UK": None,
"Turbomole": None,