10
0
mirror of https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local synced 2024-11-01 03:33:46 +01:00
This commit is contained in:
Thomas Applencourt 2015-03-16 09:22:15 +01:00
parent 2245fc2a0f
commit 4f8c21368e

View File

@ -24,7 +24,8 @@ def install_with_pip(name):
import pip import pip
pip.main(['install', name]) pip.main(['install', name])
except: except:
print "You need pip, (http://pip.readthedocs.org/en/latest/installing.html)" print "You need pip"
print "(http://pip.readthedocs.org/en/latest/installing.html)"
sys.exit(1) sys.exit(1)
@ -87,13 +88,13 @@ class EMSL_dump:
return dict_ele return dict_ele
def dwl_basis_list_raw(self): def dwl_basis_list_raw(self):
"""Return the source code of the iframe who contains the list of the basis set available""" """Return the source code of the iframe
who contains the list of the basis set available"""
print "Download all the name available in EMSL. It can take some time.", print "Download all the name available in EMSL."
print "It can take some time.",
sys.stdout.flush() sys.stdout.flush()
"""Download the source code of the iframe who contains the list of the basis set available"""
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content" url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
if self.debug: if self.debug:
import cPickle as pickle import cPickle as pickle
@ -114,8 +115,10 @@ class EMSL_dump:
return page return page
def basis_list_raw_to_array(self, data_raw): def basis_list_raw_to_array(self, data_raw):
"""Parse the raw html basis set to create a dict will all the information for dowloanding the database : """Parse the raw html basis set to create a dict
Return d[name] = [name, xml_path, description, lits of the elements available] will all the information for dowloanding the database :
Return d[name] = [name, xml_path, description,
lits of the elements available]
Explanation of tuple data from 'tup' by index: Explanation of tuple data from 'tup' by index:
@ -146,8 +149,8 @@ class EMSL_dump:
tup = eval(s) tup = eval(s)
# non-published (e.g. rejected) basis sets should be ignored # non-published (e.g. rejected) basis sets should be ignored
if tup[4] != "published": if tup[4] != "published":
continue continue
xml_path = tup[0] xml_path = tup[0]
name = tup[1] name = tup[1]
@ -175,11 +178,11 @@ class EMSL_dump:
raise Exception("WARNING not DATA") raise Exception("WARNING not DATA")
else: else:
dict_replace = {"PHOSPHOROUS": "PHOSPHORUS", dict_replace = {"PHOSPHOROUS": "PHOSPHORUS",
"D+": "E+", "D+": "E+",
"D-": "E-"} "D-": "E-"}
for k, v in dict_replace.iteritems(): for k, v in dict_replace.iteritems():
data = data.replace(k, v) data = data.replace(k, v)
data = data[b + 5:e - 1].split('\n\n') data = data[b + 5:e - 1].split('\n\n')
@ -249,7 +252,8 @@ class EMSL_dump:
q_out = Queue.Queue(num_worker_threads) q_out = Queue.Queue(num_worker_threads)
def worker(): def worker():
"""get a Job from the q_in, do stuff, when finish put it in the q_out""" """get a Job from the q_in, do stuff,
when finish put it in the q_out"""
while True: while True:
name, path_xml, des, elts = q_in.get() name, path_xml, des, elts = q_in.get()
@ -265,8 +269,11 @@ class EMSL_dump:
while attemps < attemps_max: while attemps < attemps_max:
text = self.requests.get(url, params=params).text text = self.requests.get(url, params=params).text
try: try:
basis_data = self.parse_basis_data_gamess_us(text, basis_data = self.parse_basis_data_gamess_us(
name, des, elts) text,
name,
des,
elts)
except: except:
time.sleep(0.1) time.sleep(0.1)
attemps += 1 attemps += 1
@ -297,14 +304,14 @@ class EMSL_dump:
t.daemon = True t.daemon = True
t.start() t.start()
nb_basis = len(list_basis_array) nb_basis = len(dict_basis_list)
for i in range(nb_basis): for i in range(nb_basis):
name, des, basis_data = q_out.get() name, des, basis_data = q_out.get()
q_out.task_done() q_out.task_done()
try: try:
cmd = "INSERT INTO basis_tab(name,description) VALUES (?,?)" cmd = "INSERT INTO basis_tab(name,description) VALUES (?,?)"
c.execute(cmd, [name, des]) c.execute(cmd, [name, des])
conn.commit() conn.commit()
except sqlite3.IntegrityError: except sqlite3.IntegrityError:
@ -312,7 +319,7 @@ class EMSL_dump:
id_ = [c.lastrowid] id_ = [c.lastrowid]
try: try:
cmd = "INSERT INTO data_tab VALUES (?,?,?)" cmd = "INSERT INTO data_tab VALUES (?,?,?)"
c.executemany(cmd, [id_ + k for k in basis_data]) c.executemany(cmd, [id_ + k for k in basis_data])
conn.commit() conn.commit()
print '{:>3}'.format(i + 1), "/", nb_basis, name print '{:>3}'.format(i + 1), "/", nb_basis, name