From ec0fc43bf8088b8c698f5deadf7841dc410ec3da Mon Sep 17 00:00:00 2001 From: Thomas Applencourt Date: Mon, 16 Mar 2015 19:10:55 +0100 Subject: [PATCH] Add format in db --- EMSL_api.py | 14 ++-- db/Gamess-us.db | Bin 29757440 -> 29758464 bytes src/EMSL_dump.py | 6 +- src/EMSL_local.py | 101 +++++++-------------------- src/parser.py | 170 +++++++++++++++++++++++++++------------------- 5 files changed, 136 insertions(+), 155 deletions(-) diff --git a/EMSL_api.py b/EMSL_api.py index 86e1370..623ecb1 100755 --- a/EMSL_api.py +++ b/EMSL_api.py @@ -5,14 +5,12 @@ Usage: EMSL_api.py list_basis [--basis=...] - --format= [--atom=...] [--db_path=] [--average_mo_number] EMSL_api.py list_atoms --basis= [--db_path=] EMSL_api.py get_basis_data --basis= - --format= [--atom=...] [--db_path=] [(--save [--path=])] @@ -77,7 +75,7 @@ if __name__ == '__main__': # \_____/_|___/\__| \____/ \__,_|___/_|___/ if arguments["list_basis"]: - e = EMSL_local(db_path=db_path, format=arguments["--format"]) + e = EMSL_local(db_path=db_path) elts = arguments["--atom"] @@ -100,7 +98,7 @@ if __name__ == '__main__': # | | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __| # | |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \ # \_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/ - if arguments["list_atoms"]: + elif arguments["list_atoms"]: e = EMSL_local(db_path=db_path) basis_name = arguments["--basis"] @@ -113,8 +111,8 @@ if __name__ == '__main__': # | ___ \/ _` / __| / __| / _` |/ _` | __/ _` | # | |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| | # \____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_| - if arguments["get_basis_data"]: - e = EMSL_local(db_path=db_path, format=arguments["--format"]) + elif arguments["get_basis_data"]: + e = EMSL_local(db_path=db_path) basis_name = arguments["--basis"][0] elts = arguments["--atom"] @@ -144,7 +142,7 @@ if __name__ == '__main__': # | | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __| # | |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \ # \_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/ - if arguments["list_formats"]: + elif arguments["list_formats"]: e = EMSL_dump() for i in e.get_list_format(): print i @@ -155,7 +153,7 @@ if __name__ == '__main__': # | | | '__/ _ \/ _` | __/ _ \ / _` | '_ \ # | \__/\ | | __/ (_| | || __/ | (_| | |_) | # \____/_| \___|\__,_|\__\___| \__,_|_.__/ - if arguments["create_db"]: + elif arguments["create_db"]: db_path = arguments["--db_path"] format = arguments["--format"] diff --git a/db/Gamess-us.db b/db/Gamess-us.db index cf70e8825fdfab63c54874dfac475b2ae9584e26..50aa1e5e20367c60e0d05bd5a89db0ef0f87e9cb 100644 GIT binary patch delta 1733 zcmYM#ceED*7{>AY`=z9G35h~Cvx&-{-RhP$-CHi9Z&S3C$kzJO5T#OCg^*1lA(`2G z@4dI|{W$*UJ?C@IbI$w6d(QLEJ8{Xv(uvC!mdS-Qv}(p9=icj+NLrI+-UL*+2(Bb8DmeWjlqF4b~`94Yyxj<&i9Jx^D$~>7b7s&!yD2wD`xkN6N%j9xd zEKB4Hxl*o@rE;}gBiG6@xlXQ^8{|g0Np6-~RJR{G_bMm~rATP>G^0K@lugYuky1XH8 z%3Jcbyd&?*d-A?~ARo#{^09m(pUP+QxqKmC%2)EWd?Vk=ck;dbAnW8u`AL43U*uQ$ zO@5a@XQtO>7(6MUyCtrm=nO5Ie?Bv2*Ma&7ygNp~fjQ(*{9398RfcUTffiWls$B-Br!{XQ&9>>M;F(O9Bs2Ck%;)EC*<3u8ZsAhPW|qikst>xHWEz z<*_1ek2~VdSQ&T4s<=DuiF;#p+!t%&{&*lBjECakc%-nJwS}d1YZr?nN{Y*iBUaY0 z>eHaOU`gX*w=Na6YBg-ywAgq2z(FI24jVIW)WGorlGmrPcI;D8UR6<6RoeS0aD%4n4Qe)l!M^=_K>-8T-O9N>r$4VnPP8v%SIbNDdGifd@q@|o7t>i>GNlunib(yAai7{+$i(pCb?N|kz3_9nJ>4?0=Yx(l)Gf1 zERw}?w=9vRvP_oCJ+eYp$||{6?vvHBM%K#xvQE~^1F}Ij$|iYGHp@e@MIM%|vQ4(j z4%sQYPLfU7{^AVI4&ATlQ=$_Mzd%h zEuv+d5Ut|GI4Mq!Q{vQU9c`j*w2Ss}TAUsoqGOy9o#M>s99^PooE61!c65t#qI>j+ zo^fuR7ro-Y|GlG6oF9FoUtACuM*kQP7sbFB6oX?(42@wiJVwODF)~KQB{4e2#Mrnr zE{kz7J|@KFF)=2^^q3LX#&t0>X2tBdK5mFPF*k0Ed2v(R z9Jj=+aa+uf+hal85qHL2u`m|J;R1zN*Img u5F2AtJQ$nfq1X}+$JW>u+ha%UEZEGhf`w(*7uGAYs^pe6MY{?*6#NUpO?Hd` diff --git a/src/EMSL_dump.py b/src/EMSL_dump.py index 184b6d5..f26fa1b 100644 --- a/src/EMSL_dump.py +++ b/src/EMSL_dump.py @@ -189,6 +189,10 @@ class EMSL_dump: REFERENCES basis_tab(basis_id) );''') + c.execute('''CREATE TABLE format_tab(format TEXT)''') + c.execute('''INSERT INTO format_tab VALUES (?)''', [self.format]) + conn.commit() + c.execute(''' CREATE VIEW output_tab AS SELECT basis_id, name, @@ -229,8 +233,6 @@ class EMSL_dump: basis_data = self.parser(text, name, des, elts, self.debug) except: - if self.debug: - raise time.sleep(0.1) attemps += 1 else: diff --git a/src/EMSL_local.py b/src/EMSL_local.py index 806b5c3..75e7669 100755 --- a/src/EMSL_local.py +++ b/src/EMSL_local.py @@ -70,14 +70,14 @@ def cond_sql_or(table_name, l_value, glob=False): def string_to_nb_mo(str_type): """Take a string and return the nb of orbital""" - assert len(str_type) == 1 - d = {"S": 1, - "P": 2, - "D": 3} + d = {"S": 3, + "P": 5, + "D": 7, + "SP": 8} if str_type in d: - return 2 * d[str_type] + 1 + return d[str_type] # ord("F") = 70 and ord("Z") = 87 elif 70 <= ord(str_type) <= 87: # ord("F") = 70 and l = 4 so ofset if 66 @@ -127,36 +127,14 @@ class EMSL_local: All the method for using the EMSL db localy """ - def __init__(self, db_path=None, format=None): + def __init__(self, db_path=None): self.db_path = db_path - self.p = re.compile(ur'^(\w)\s+\d+\b') - self.format = format - def get_list_symetry(self, atom_basis): - """ - Return the begin and the end of all the type of orbital - input: atom_basis = [name, ] - output: [ [type, begin, end], ...] - """ - # Example - # [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]" + self.conn = sqlite3.connect(self.db_path) + self.c = self.conn.cursor() - l = [] - for i, line in enumerate(atom_basis): - m = re.search(self.p, line) - if m: - l.append([m.group(1), i]) - try: - l[-2].append(i) - except IndexError: - pass - - l[-1].append(i + 1) - - print l - sys.exit() - - return l + self.c.execute("SELECT * from format_tab") + self.format = self.c.fetchone()[0] def get_list_basis_available(self, elts=[], @@ -173,12 +151,6 @@ class EMSL_local: # Else Get name,description # 3) Parse it - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() # ~#~#~#~#~#~ # # F i l t e r # @@ -213,9 +185,9 @@ class EMSL_local: WHERE elt=? AND {0}""".format(cmd_filter_basis) cmd = " INTERSECT ".join([str_] * len(elts)) + ";" - c.execute(cmd, elts) + self.c.execute(cmd, elts) - l_basis_id = [i[0] for i in c.fetchall()] + l_basis_id = [i[0] for i in self.c.fetchall()] # ~#~#~#~#~#~#~#~#~#~#~#~#~#~ # # C r e a t e _ t h e _ c m d # @@ -240,10 +212,8 @@ class EMSL_local: # F e t c h # # ~#~#~#~#~ # - c.execute(cmd) - info = c.fetchall() - - conn.close() + self.c.execute(cmd) + info = self.c.fetchall() # ~#~#~#~#~#~#~ # # P a r s i n g # @@ -254,32 +224,25 @@ class EMSL_local: dict_info = OrderedDict() # Description : dict_info[name] = [description, nb_mo, nb_ele] + from src.parser import symmetry_dict if average_mo_number: - from src.parser import handle_f_dict - try: - f = handle_f_dict[self.format] + l_symmetry = symmetry_dict[self.format] except KeyError: - str_ = " WARNING Cannot handle counting L function in this format" - print >> sys.stderr, str_ + print >> sys.stderr, "You need to add a function in symmetry_dict" + print >> sys.stderr, "for your format ({0})".format(self.format) + sys.exit(1) for name, description, atom_basis in info: - try: - atom_basis = f([atom_basis], self.get_list_symetry) - atom_basis = "\n\n".join(atom_basis) - except UnboundLocalError: - pass - nb_mo = 0 line = atom_basis.split("\n") - for type_, _, _ in self.get_list_symetry(line): + for type_, _, _ in l_symmetry(line): nb_mo += string_to_nb_mo(type_) - try: dict_info[name][1] += nb_mo dict_info[name][2] += 1. @@ -297,13 +260,6 @@ class EMSL_local: def get_list_element_available(self, basis_name): - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - # ~#~#~#~#~#~ # # F i l t e r # # ~#~#~#~#~#~ # @@ -316,14 +272,13 @@ class EMSL_local: # F e t c h # # ~#~#~#~#~ # - c.execute(str_, {"name_us": basis_name}) - conn.close() + self.c.execute(str_, {"name_us": basis_name}) # ~#~#~#~#~#~ # # R e t u r n # # ~#~#~#~#~#~ # - return [str(i[0]) for i in c.fetchall()] + return [str(i[0]) for i in self.c.fetchall()] def get_basis(self, basis_name, elts=None, @@ -332,26 +287,18 @@ class EMSL_local: Return the data from the basis set """ - # ~#~#~#~ # - # I n i t # - # ~#~#~#~ # - - conn = sqlite3.connect(self.db_path) - c = conn.cursor() - # ~#~#~#~#~#~ # # F i l t e r # # ~#~#~#~#~#~ # cmd_filter_ele = " ".join(cond_sql_or("elt", elts)) if elts else "(1)" - c.execute('''SELECT DISTINCT data from output_tab + self.c.execute('''SELECT DISTINCT data from output_tab WHERE name="{0}" AND {1}'''.format(basis_name, cmd_filter_ele)) # We need to take i[0] because fetchall return a tuple [(value),...] - l_atom_basis = [i[0].strip() for i in c.fetchall()] - conn.close() + l_atom_basis = [i[0].strip() for i in self.c.fetchall()] # ~#~#~#~#~#~#~#~ # # h a n d l e _ f # diff --git a/src/parser.py b/src/parser.py index 5fb968a..2320780 100644 --- a/src/parser.py +++ b/src/parser.py @@ -1,4 +1,5 @@ import sys +import os def get_dict_ele(): @@ -72,7 +73,98 @@ def parse_basis_data_gamess_us(data, name, des, elts, debug=False): return [name, des, basis_data] -import os + +import re + +symmetry_regex = re.compile(ur'^(\w)\s+\d+\b') + + +def l_symmetry_gamess_us(atom_basis): + """ + Return the begin and the end of all the type of orbital + input: atom_basis = [name, S 1, 12 0.12 12212, ...] + output: [ [type, begin, end], ...] + """ + # Example + # [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]" + + l = [] + for i, line in enumerate(atom_basis): + m = re.search(symmetry_regex, line) + if m: + # Cause of L ! + read_symmetry = m.group(1) + + # L is real L or special SP + # Just check the number of exponant + if read_symmetry == "L" and len(atom_basis[i + 1].split()) == 4: + real_symmetry = "SP" + else: + real_symmetry = read_symmetry + + l.append([real_symmetry, i]) + try: + l[-2].append(i) + except IndexError: + pass + + l[-1].append(i + 1) + return l + + +def handle_f_gamess_us(l_atom_basis): + """ + Read l_atom_basis and change the SP in L and P + """ + + l_data = [] + for atom_basis in l_atom_basis: + + # Split the data in line + l_line_raw = atom_basis.split("\n") + l_line = [l_line_raw[0]] + # l_line_raw[0] containt the name of the Atom + + for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw): + + if symmetry == "SP": + + body_s = [] + body_p = [] + + for i_l in l_line_raw[begin + 1:end]: + + # one L => S & P + a = i_l.split() + + common = "{:>3}".format(a[0]) + common += "{:>15.7f}".format(float(a[1])) + + tail_s = common + "{:>23.7f}".format(float(a[2])) + body_s.append(tail_s) + + tail_p = common + "{:>23.7f}".format(float(a[3])) + body_p.append(tail_p) + + l_line += [l_line_raw[begin].replace("L", "S")] + l_line += body_s + + l_line += [l_line_raw[begin].replace("L", "P")] + l_line += body_p + else: + l_line += l_line_raw[begin:end] + + l_data.append("\n".join(l_line)) + + return l_data + +# ______ _ _ _ _ +# | ___| | | | (_) | | +# | |_ _ __ ___ _ __ ___ __ _| |_ __| |_ ___| |_ +# | _| '__/ _ \| '_ ` _ \ / _` | __| / _` | |/ __| __| +# | | | | | (_) | | | | | | (_| | |_ | (_| | | (__| |_ +# \_| |_| \___/|_| |_| |_|\__,_|\__| \__,_|_|\___|\__| +# format_dict = {"Gaussian94": None, "GAMESS-US": parse_basis_data_gamess_us, @@ -90,71 +182,13 @@ format_dict = {"Gaussian94": None, "deMon2k": None, "AcesII": None} -# _ _ _ _ _ _ _ _ _ -# | | | | | | | ( | ) | ( | ) -# | |_| | __ _ _ __ __| | | ___ V V| | V V -# | _ |/ _` | '_ \ / _` | |/ _ \ | | -# | | | | (_| | | | | (_| | | __/ | |____ -# \_| |_/\__,_|_| |_|\__,_|_|\___| \_____/ -# -# +# _____ _ _ _ _ +# / ___| | | | (_) | | +# \ `--. _ _ _ __ ___ _ __ ___ ___| |_ _ __ _ _ __| |_ ___| |_ +# `--. \ | | | '_ ` _ \| '_ ` _ \ / _ \ __| '__| | | | / _` | |/ __| __| +# /\__/ / |_| | | | | | | | | | | | __/ |_| | | |_| | | (_| | | (__| |_ +# \____/ \__, |_| |_| |_|_| |_| |_|\___|\__|_| \__, | \__,_|_|\___|\__| +# __/ | __/ | +# |___/ |___/ - -def handle_f_gamess_us(l_atom_basis, list_symetry): - """ - Read l_atom_basis, if "L" orbital before "D" one, split them into S and P - """ - - l_data = [] - for atom_basis in l_atom_basis: - - # Split the data in line - l_line_raw = atom_basis.split("\n") - l_line = [l_line_raw[0]] - # l_line_raw[0] containt the name of the Atom - - maybe_good_l = True - - for symmetry, begin, end in list_symetry(l_line_raw): - - if maybe_good_l and symmetry in "L": - - body_s = [] - body_p = [] - - for i_l in l_line_raw[begin + 1:end]: - - # one L => S & P - a = i_l.split() - - common = "{:>3}".format(a[0]) - common += "{:>15.7f}".format(float(a[1])) - - tail_s = common + "{:>23.7f}".format(float(a[2])) - body_s.append(tail_s) - - # Maybe only One coefficient for L function - # I guess it mean S and L are equal - try: - tail_p = common + "{:>23.7f}".format(float(a[3])) - except IndexError: - tail_p = tail_s - finally: - body_p.append(tail_p) - - l_line += [l_line_raw[begin].replace("L", "S")] - l_line += body_s - - l_line += [l_line_raw[begin].replace("L", "P")] - l_line += body_p - else: - l_line += l_line_raw[begin:end] - - if symmetry not in ["S", "P", "L"]: - maybe_good_l = False - - l_data.append("\n".join(l_line)) - - return l_data - -handle_f_dict = {"GAMESS-US": handle_f_gamess_us} +symmetry_dict = {"GAMESS-US": l_symmetry_gamess_us}