diff --git a/EMSL_api.py b/EMSL_api.py index 86e1370..623ecb1 100755 --- a/EMSL_api.py +++ b/EMSL_api.py @@ -5,14 +5,12 @@ Usage: EMSL_api.py list_basis [--basis=...] - --format= [--atom=...] [--db_path=] [--average_mo_number] EMSL_api.py list_atoms --basis= [--db_path=] EMSL_api.py get_basis_data --basis= - --format= [--atom=...] [--db_path=] [(--save [--path=])] @@ -77,7 +75,7 @@ if __name__ == '__main__': # \_____/_|___/\__| \____/ \__,_|___/_|___/ if arguments["list_basis"]: - e = EMSL_local(db_path=db_path, format=arguments["--format"]) + e = EMSL_local(db_path=db_path) elts = arguments["--atom"] @@ -100,7 +98,7 @@ if __name__ == '__main__': # | | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __| # | |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \ # \_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/ - if arguments["list_atoms"]: + elif arguments["list_atoms"]: e = EMSL_local(db_path=db_path) basis_name = arguments["--basis"] @@ -113,8 +111,8 @@ if __name__ == '__main__': # | ___ \/ _` / __| / __| / _` |/ _` | __/ _` | # | |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| | # \____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_| - if arguments["get_basis_data"]: - e = EMSL_local(db_path=db_path, format=arguments["--format"]) + elif arguments["get_basis_data"]: + e = EMSL_local(db_path=db_path) basis_name = arguments["--basis"][0] elts = arguments["--atom"] @@ -144,7 +142,7 @@ if __name__ == '__main__': # | | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __| # | |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \ # \_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/ - if arguments["list_formats"]: + elif arguments["list_formats"]: e = EMSL_dump() for i in e.get_list_format(): print i @@ -155,7 +153,7 @@ if __name__ == '__main__': # | | | '__/ _ \/ _` | __/ _ \ / _` | '_ \ # | \__/\ | | __/ (_| | || __/ | (_| | |_) | # \____/_| \___|\__,_|\__\___| \__,_|_.__/ - if arguments["create_db"]: + elif arguments["create_db"]: db_path = arguments["--db_path"] format = arguments["--format"] diff --git a/db/Gamess-us.db b/db/Gamess-us.db index cf70e88..50aa1e5 100644 Binary files a/db/Gamess-us.db and b/db/Gamess-us.db differ diff --git a/src/EMSL_dump.py b/src/EMSL_dump.py index 184b6d5..f26fa1b 100644 --- a/src/EMSL_dump.py +++ b/src/EMSL_dump.py @@ -189,6 +189,10 @@ class EMSL_dump: REFERENCES basis_tab(basis_id) );''') + c.execute('''CREATE TABLE format_tab(format TEXT)''') + c.execute('''INSERT INTO format_tab VALUES (?)''', [self.format]) + conn.commit() + c.execute(''' CREATE VIEW output_tab AS SELECT basis_id, name, @@ -229,8 +233,6 @@ class EMSL_dump: basis_data = self.parser(text, name, des, elts, self.debug) except: - if self.debug: - raise time.sleep(0.1) attemps += 1 else: diff --git a/src/EMSL_local.py b/src/EMSL_local.py index 806b5c3..75e7669 100755 --- a/src/EMSL_local.py +++ b/src/EMSL_local.py @@ -70,14 +70,14 @@ def cond_sql_or(table_name, l_value, glob=False): def string_to_nb_mo(str_type): """Take a string and return the nb of orbital""" - assert len(str_type) == 1 - d = {"S": 1, - "P": 2, - "D": 3} + d = {"S": 3, + "P": 5, + "D": 7, + "SP": 8} if str_type in d: - return 2 * d[str_type] + 1 + return d[str_type] # ord("F") = 70 and ord("Z") = 87 elif 70 <= ord(str_type) <= 87: # ord("F") = 70 and l = 4 so ofset if 66 @@ -127,36 +127,14 @@ class EMSL_local: All the method for using the EMSL db localy """ - def __init__(self, db_path=None, format=None): + def __init__(self, db_path=None): self.db_path = db_path - self.p = re.compile(ur'^(\w)\s+\d+\b') - self.format = format - def get_list_symetry(self, atom_basis): - """ - Return the begin and the end of all the type of orbital - input: atom_basis = [name, ] - output: [ [type, begin, end], ...] - """ - # Example - # [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]" + self.conn = sqlite3.connect(self.db_path) + self.c = self.conn.cursor() - l = [] - for i, line in enumerate(atom_basis): - m = re.search(self.p, line) - if m: - l.append([m.group(1), i]) - try: - l[-2].append(i) - except IndexError: - pass - - l[-1].append(i + 1) - - print l - sys.exit() - - return l + self.c.execute("SELECT * from format_tab") + self.format = self.c.fetchone()[0] def get_list_basis_available(self, elts=[], @@ -173,12 +151,6 @@ class EMSL_local: # Else Get name,description # 3) Parse it - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() # ~#~#~#~#~#~ # # F i l t e r # @@ -213,9 +185,9 @@ class EMSL_local: WHERE elt=? AND {0}""".format(cmd_filter_basis) cmd = " INTERSECT ".join([str_] * len(elts)) + ";" - c.execute(cmd, elts) + self.c.execute(cmd, elts) - l_basis_id = [i[0] for i in c.fetchall()] + l_basis_id = [i[0] for i in self.c.fetchall()] # ~#~#~#~#~#~#~#~#~#~#~#~#~#~ # # C r e a t e _ t h e _ c m d # @@ -240,10 +212,8 @@ class EMSL_local: # F e t c h # # ~#~#~#~#~ # - c.execute(cmd) - info = c.fetchall() - - conn.close() + self.c.execute(cmd) + info = self.c.fetchall() # ~#~#~#~#~#~#~ # # P a r s i n g # @@ -254,32 +224,25 @@ class EMSL_local: dict_info = OrderedDict() # Description : dict_info[name] = [description, nb_mo, nb_ele] + from src.parser import symmetry_dict if average_mo_number: - from src.parser import handle_f_dict - try: - f = handle_f_dict[self.format] + l_symmetry = symmetry_dict[self.format] except KeyError: - str_ = " WARNING Cannot handle counting L function in this format" - print >> sys.stderr, str_ + print >> sys.stderr, "You need to add a function in symmetry_dict" + print >> sys.stderr, "for your format ({0})".format(self.format) + sys.exit(1) for name, description, atom_basis in info: - try: - atom_basis = f([atom_basis], self.get_list_symetry) - atom_basis = "\n\n".join(atom_basis) - except UnboundLocalError: - pass - nb_mo = 0 line = atom_basis.split("\n") - for type_, _, _ in self.get_list_symetry(line): + for type_, _, _ in l_symmetry(line): nb_mo += string_to_nb_mo(type_) - try: dict_info[name][1] += nb_mo dict_info[name][2] += 1. @@ -297,13 +260,6 @@ class EMSL_local: def get_list_element_available(self, basis_name): - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - # ~#~#~#~#~#~ # # F i l t e r # # ~#~#~#~#~#~ # @@ -316,14 +272,13 @@ class EMSL_local: # F e t c h # # ~#~#~#~#~ # - c.execute(str_, {"name_us": basis_name}) - conn.close() + self.c.execute(str_, {"name_us": basis_name}) # ~#~#~#~#~#~ # # R e t u r n # # ~#~#~#~#~#~ # - return [str(i[0]) for i in c.fetchall()] + return [str(i[0]) for i in self.c.fetchall()] def get_basis(self, basis_name, elts=None, @@ -332,26 +287,18 @@ class EMSL_local: Return the data from the basis set """ - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - # ~#~#~#~#~#~ # # F i l t e r # # ~#~#~#~#~#~ # cmd_filter_ele = " ".join(cond_sql_or("elt", elts)) if elts else "(1)" - c.execute('''SELECT DISTINCT data from output_tab + self.c.execute('''SELECT DISTINCT data from output_tab WHERE name="{0}" AND {1}'''.format(basis_name, cmd_filter_ele)) # We need to take i[0] because fetchall return a tuple [(value),...] - l_atom_basis = [i[0].strip() for i in c.fetchall()] - conn.close() + l_atom_basis = [i[0].strip() for i in self.c.fetchall()] # ~#~#~#~#~#~#~#~ # # h a n d l e _ f # diff --git a/src/parser.py b/src/parser.py index 5fb968a..2320780 100644 --- a/src/parser.py +++ b/src/parser.py @@ -1,4 +1,5 @@ import sys +import os def get_dict_ele(): @@ -72,7 +73,98 @@ def parse_basis_data_gamess_us(data, name, des, elts, debug=False): return [name, des, basis_data] -import os + +import re + +symmetry_regex = re.compile(ur'^(\w)\s+\d+\b') + + +def l_symmetry_gamess_us(atom_basis): + """ + Return the begin and the end of all the type of orbital + input: atom_basis = [name, S 1, 12 0.12 12212, ...] + output: [ [type, begin, end], ...] + """ + # Example + # [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]" + + l = [] + for i, line in enumerate(atom_basis): + m = re.search(symmetry_regex, line) + if m: + # Cause of L ! + read_symmetry = m.group(1) + + # L is real L or special SP + # Just check the number of exponant + if read_symmetry == "L" and len(atom_basis[i + 1].split()) == 4: + real_symmetry = "SP" + else: + real_symmetry = read_symmetry + + l.append([real_symmetry, i]) + try: + l[-2].append(i) + except IndexError: + pass + + l[-1].append(i + 1) + return l + + +def handle_f_gamess_us(l_atom_basis): + """ + Read l_atom_basis and change the SP in L and P + """ + + l_data = [] + for atom_basis in l_atom_basis: + + # Split the data in line + l_line_raw = atom_basis.split("\n") + l_line = [l_line_raw[0]] + # l_line_raw[0] containt the name of the Atom + + for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw): + + if symmetry == "SP": + + body_s = [] + body_p = [] + + for i_l in l_line_raw[begin + 1:end]: + + # one L => S & P + a = i_l.split() + + common = "{:>3}".format(a[0]) + common += "{:>15.7f}".format(float(a[1])) + + tail_s = common + "{:>23.7f}".format(float(a[2])) + body_s.append(tail_s) + + tail_p = common + "{:>23.7f}".format(float(a[3])) + body_p.append(tail_p) + + l_line += [l_line_raw[begin].replace("L", "S")] + l_line += body_s + + l_line += [l_line_raw[begin].replace("L", "P")] + l_line += body_p + else: + l_line += l_line_raw[begin:end] + + l_data.append("\n".join(l_line)) + + return l_data + +# ______ _ _ _ _ +# | ___| | | | (_) | | +# | |_ _ __ ___ _ __ ___ __ _| |_ __| |_ ___| |_ +# | _| '__/ _ \| '_ ` _ \ / _` | __| / _` | |/ __| __| +# | | | | | (_) | | | | | | (_| | |_ | (_| | | (__| |_ +# \_| |_| \___/|_| |_| |_|\__,_|\__| \__,_|_|\___|\__| +# format_dict = {"Gaussian94": None, "GAMESS-US": parse_basis_data_gamess_us, @@ -90,71 +182,13 @@ format_dict = {"Gaussian94": None, "deMon2k": None, "AcesII": None} -# _ _ _ _ _ _ _ _ _ -# | | | | | | | ( | ) | ( | ) -# | |_| | __ _ _ __ __| | | ___ V V| | V V -# | _ |/ _` | '_ \ / _` | |/ _ \ | | -# | | | | (_| | | | | (_| | | __/ | |____ -# \_| |_/\__,_|_| |_|\__,_|_|\___| \_____/ -# -# +# _____ _ _ _ _ +# / ___| | | | (_) | | +# \ `--. _ _ _ __ ___ _ __ ___ ___| |_ _ __ _ _ __| |_ ___| |_ +# `--. \ | | | '_ ` _ \| '_ ` _ \ / _ \ __| '__| | | | / _` | |/ __| __| +# /\__/ / |_| | | | | | | | | | | | __/ |_| | | |_| | | (_| | | (__| |_ +# \____/ \__, |_| |_| |_|_| |_| |_|\___|\__|_| \__, | \__,_|_|\___|\__| +# __/ | __/ | +# |___/ |___/ - -def handle_f_gamess_us(l_atom_basis, list_symetry): - """ - Read l_atom_basis, if "L" orbital before "D" one, split them into S and P - """ - - l_data = [] - for atom_basis in l_atom_basis: - - # Split the data in line - l_line_raw = atom_basis.split("\n") - l_line = [l_line_raw[0]] - # l_line_raw[0] containt the name of the Atom - - maybe_good_l = True - - for symmetry, begin, end in list_symetry(l_line_raw): - - if maybe_good_l and symmetry in "L": - - body_s = [] - body_p = [] - - for i_l in l_line_raw[begin + 1:end]: - - # one L => S & P - a = i_l.split() - - common = "{:>3}".format(a[0]) - common += "{:>15.7f}".format(float(a[1])) - - tail_s = common + "{:>23.7f}".format(float(a[2])) - body_s.append(tail_s) - - # Maybe only One coefficient for L function - # I guess it mean S and L are equal - try: - tail_p = common + "{:>23.7f}".format(float(a[3])) - except IndexError: - tail_p = tail_s - finally: - body_p.append(tail_p) - - l_line += [l_line_raw[begin].replace("L", "S")] - l_line += body_s - - l_line += [l_line_raw[begin].replace("L", "P")] - l_line += body_p - else: - l_line += l_line_raw[begin:end] - - if symmetry not in ["S", "P", "L"]: - maybe_good_l = False - - l_data.append("\n".join(l_line)) - - return l_data - -handle_f_dict = {"GAMESS-US": handle_f_gamess_us} +symmetry_dict = {"GAMESS-US": l_symmetry_gamess_us}