mirror of
https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local
synced 2025-01-08 20:33:13 +01:00
Gestion of duplicate Name and Error when dowload
This commit is contained in:
parent
86b8faabe7
commit
17f3b0b500
17
EMSL_api.py
17
EMSL_api.py
@ -20,7 +20,7 @@ Options:
|
|||||||
<db_path> is the path to the SQLite3 file containing the Basis sets.
|
<db_path> is the path to the SQLite3 file containing the Basis sets.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
version="0.1.1"
|
version = "0.1.1"
|
||||||
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
@ -33,8 +33,8 @@ from EMSL_utility import EMSL_local
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
arguments = docopt(__doc__, version='EMSL Api '+version)
|
arguments = docopt(__doc__, version='EMSL Api ' + version)
|
||||||
print arguments
|
# print arguments
|
||||||
|
|
||||||
if arguments["get_list_basis"]:
|
if arguments["get_list_basis"]:
|
||||||
db_path = arguments["<db_path>"]
|
db_path = arguments["<db_path>"]
|
||||||
@ -64,7 +64,7 @@ if __name__ == '__main__':
|
|||||||
|
|
||||||
l = e.get_basis(basis_name, elts)
|
l = e.get_basis(basis_name, elts)
|
||||||
for i in l:
|
for i in l:
|
||||||
print i,'\n'
|
print i, '\n'
|
||||||
|
|
||||||
elif arguments["get_list_formats"]:
|
elif arguments["get_list_formats"]:
|
||||||
for i in format_dict:
|
for i in format_dict:
|
||||||
@ -74,11 +74,10 @@ if __name__ == '__main__':
|
|||||||
db_path = arguments["<db_path>"]
|
db_path = arguments["<db_path>"]
|
||||||
format = arguments["<format>"]
|
format = arguments["<format>"]
|
||||||
if format not in format_dict:
|
if format not in format_dict:
|
||||||
print "Format %s doesn't exist. Run get_list_formats to get the list of formats."%(format)
|
print "Format %s doesn't exist. Run get_list_formats to get the list of formats." % (format)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
contraction = not arguments["--no-contraction"]
|
contraction = not arguments["--no-contraction"]
|
||||||
|
|
||||||
print "go"
|
e = EMSL_dump(
|
||||||
e = EMSL_dump(db_path=db_path, format=format_dict[format], contraction=contraction)
|
db_path=db_path, format=format_dict[format], contraction=contraction)
|
||||||
e.new_db()
|
e.new_db()
|
||||||
|
|
||||||
|
@ -4,8 +4,9 @@ import sqlite3
|
|||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
debug = True
|
debug = False
|
||||||
|
|
||||||
|
|
||||||
class EMSL_dump:
|
class EMSL_dump:
|
||||||
@ -23,7 +24,7 @@ class EMSL_dump:
|
|||||||
self.db_path = path
|
self.db_path = path
|
||||||
|
|
||||||
def dwl_basis_list_raw(self):
|
def dwl_basis_list_raw(self):
|
||||||
print "Dwl the basis list info",
|
print "Download all the name available in EMSL. It can take some time.",
|
||||||
sys.stdout.flush()
|
sys.stdout.flush()
|
||||||
|
|
||||||
"""Download the source code of the iframe who contains the list of the basis set available"""
|
"""Download the source code of the iframe who contains the list of the basis set available"""
|
||||||
@ -45,14 +46,13 @@ class EMSL_dump:
|
|||||||
page = self.requests.get(url).text
|
page = self.requests.get(url).text
|
||||||
|
|
||||||
print "Done"
|
print "Done"
|
||||||
|
|
||||||
return page
|
return page
|
||||||
|
|
||||||
def bl_raw_to_array(self, data_raw):
|
def bl_raw_to_array(self, data_raw):
|
||||||
"""Parse the raw html to create a basis set array whith all the info:
|
"""Parse the raw html to create a basis set array whith all the info:
|
||||||
url, name,description"""
|
url, name,description"""
|
||||||
|
|
||||||
d = []
|
d = {}
|
||||||
|
|
||||||
for line in data_raw.split('\n'):
|
for line in data_raw.split('\n'):
|
||||||
if "new basisSet(" in line:
|
if "new basisSet(" in line:
|
||||||
@ -72,11 +72,15 @@ class EMSL_dump:
|
|||||||
|
|
||||||
if "-ecp" in url.lower():
|
if "-ecp" in url.lower():
|
||||||
continue
|
continue
|
||||||
|
d[name] = [name, url, des, elts]
|
||||||
|
|
||||||
d.append([name, url, des, elts])
|
"""Tric for the unicity of the name"""
|
||||||
|
array = [d[key] for key in d]
|
||||||
|
|
||||||
d_sort = sorted(d, key=lambda x: x[0])
|
array_sort = sorted(array, key=lambda x: x[0])
|
||||||
return d_sort
|
print len(array_sort), "basisset will be download"
|
||||||
|
|
||||||
|
return array_sort
|
||||||
|
|
||||||
def create_url(self, url, name, elts):
|
def create_url(self, url, name, elts):
|
||||||
"""Create the adequate url to get the basis data"""
|
"""Create the adequate url to get the basis data"""
|
||||||
@ -100,7 +104,8 @@ class EMSL_dump:
|
|||||||
b = data.find("$DATA")
|
b = data.find("$DATA")
|
||||||
e = data.find("$END")
|
e = data.find("$END")
|
||||||
if (b == -1 or data.find("$DATA$END") != -1):
|
if (b == -1 or data.find("$DATA$END") != -1):
|
||||||
print data
|
if debug:
|
||||||
|
print data
|
||||||
raise StandardError("WARNING not DATA")
|
raise StandardError("WARNING not DATA")
|
||||||
else:
|
else:
|
||||||
data = data[b + 5:e].split('\n\n')
|
data = data[b + 5:e].split('\n\n')
|
||||||
@ -124,22 +129,34 @@ class EMSL_dump:
|
|||||||
import threading
|
import threading
|
||||||
|
|
||||||
num_worker_threads = 7
|
num_worker_threads = 7
|
||||||
|
num_try_of_dwl = 2
|
||||||
|
|
||||||
q_in = Queue.Queue(num_worker_threads)
|
q_in = Queue.Queue(num_worker_threads)
|
||||||
q_out = Queue.Queue(num_worker_threads)
|
q_out = Queue.Queue(num_worker_threads)
|
||||||
|
|
||||||
basis_raw = {}
|
|
||||||
|
|
||||||
def worker():
|
def worker():
|
||||||
|
"""get a Job from the q_in, do stuff, when finish put it in the q_out"""
|
||||||
while True:
|
while True:
|
||||||
[name, url, des, elts] = q_in.get()
|
[name, url, des, elts] = q_in.get()
|
||||||
url = self.create_url(url, name, elts)
|
url = self.create_url(url, name, elts)
|
||||||
q_out.put(
|
|
||||||
([name, url, des, elts], self.requests.get(url).text))
|
for i in range(num_try_of_dwl):
|
||||||
|
text = self.requests.get(url).text
|
||||||
|
try:
|
||||||
|
basis_data = self.basis_data_row_to_array(
|
||||||
|
text, name, des, elts)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
time.sleep(0.1)
|
||||||
|
pass
|
||||||
|
|
||||||
|
q_out.put(([name, url, des, elts], basis_data))
|
||||||
q_in.task_done()
|
q_in.task_done()
|
||||||
|
|
||||||
def enqueue():
|
def enqueue():
|
||||||
for [name, url, des, elts] in list_basis_array:
|
for [name, url, des, elts] in list_basis_array:
|
||||||
q_in.put(([name, url, des, elts]))
|
q_in.put(([name, url, des, elts]))
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
t = threading.Thread(target=enqueue)
|
t = threading.Thread(target=enqueue)
|
||||||
@ -151,21 +168,24 @@ class EMSL_dump:
|
|||||||
t.daemon = True
|
t.daemon = True
|
||||||
t.start()
|
t.start()
|
||||||
|
|
||||||
for i in range(len(list_basis_array)):
|
nb_basis = len(list_basis_array)
|
||||||
[name, url, des, elts], basis_raw = q_out.get()
|
|
||||||
|
for i in range(nb_basis):
|
||||||
|
[name, url, des, elts], basis_data = q_out.get()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
basis_data = self.basis_data_row_to_array(
|
|
||||||
basis_raw, name, des, elts)
|
|
||||||
c.executemany(
|
c.executemany(
|
||||||
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
|
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
print i, name
|
|
||||||
|
print '{:>3}'.format(i + 1), "/", nb_basis, name
|
||||||
except:
|
except:
|
||||||
print name, url, des, elts
|
print '{:>3}'.format(i + 1), "/", nb_basis, name, "fail",
|
||||||
pass
|
print ' ', [url, des, elts]
|
||||||
|
raise
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
q_in.join()
|
q_in.join()
|
||||||
q_out.join()
|
|
||||||
|
|
||||||
def new_db(self):
|
def new_db(self):
|
||||||
"""Create new_db from scratch"""
|
"""Create new_db from scratch"""
|
||||||
|
Loading…
Reference in New Issue
Block a user