mirror of
https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local
synced 2025-01-08 20:33:13 +01:00
Gestion of duplicate Name and Error when dowload
This commit is contained in:
parent
86b8faabe7
commit
17f3b0b500
15
EMSL_api.py
15
EMSL_api.py
@ -20,7 +20,7 @@ Options:
|
||||
<db_path> is the path to the SQLite3 file containing the Basis sets.
|
||||
"""
|
||||
|
||||
version="0.1.1"
|
||||
version = "0.1.1"
|
||||
|
||||
|
||||
import sys
|
||||
@ -33,8 +33,8 @@ from EMSL_utility import EMSL_local
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
arguments = docopt(__doc__, version='EMSL Api '+version)
|
||||
print arguments
|
||||
arguments = docopt(__doc__, version='EMSL Api ' + version)
|
||||
# print arguments
|
||||
|
||||
if arguments["get_list_basis"]:
|
||||
db_path = arguments["<db_path>"]
|
||||
@ -64,7 +64,7 @@ if __name__ == '__main__':
|
||||
|
||||
l = e.get_basis(basis_name, elts)
|
||||
for i in l:
|
||||
print i,'\n'
|
||||
print i, '\n'
|
||||
|
||||
elif arguments["get_list_formats"]:
|
||||
for i in format_dict:
|
||||
@ -74,11 +74,10 @@ if __name__ == '__main__':
|
||||
db_path = arguments["<db_path>"]
|
||||
format = arguments["<format>"]
|
||||
if format not in format_dict:
|
||||
print "Format %s doesn't exist. Run get_list_formats to get the list of formats."%(format)
|
||||
print "Format %s doesn't exist. Run get_list_formats to get the list of formats." % (format)
|
||||
sys.exit(1)
|
||||
contraction = not arguments["--no-contraction"]
|
||||
|
||||
print "go"
|
||||
e = EMSL_dump(db_path=db_path, format=format_dict[format], contraction=contraction)
|
||||
e = EMSL_dump(
|
||||
db_path=db_path, format=format_dict[format], contraction=contraction)
|
||||
e.new_db()
|
||||
|
||||
|
@ -4,8 +4,9 @@ import sqlite3
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
debug = True
|
||||
debug = False
|
||||
|
||||
|
||||
class EMSL_dump:
|
||||
@ -23,7 +24,7 @@ class EMSL_dump:
|
||||
self.db_path = path
|
||||
|
||||
def dwl_basis_list_raw(self):
|
||||
print "Dwl the basis list info",
|
||||
print "Download all the name available in EMSL. It can take some time.",
|
||||
sys.stdout.flush()
|
||||
|
||||
"""Download the source code of the iframe who contains the list of the basis set available"""
|
||||
@ -45,14 +46,13 @@ class EMSL_dump:
|
||||
page = self.requests.get(url).text
|
||||
|
||||
print "Done"
|
||||
|
||||
return page
|
||||
|
||||
def bl_raw_to_array(self, data_raw):
|
||||
"""Parse the raw html to create a basis set array whith all the info:
|
||||
url, name,description"""
|
||||
|
||||
d = []
|
||||
d = {}
|
||||
|
||||
for line in data_raw.split('\n'):
|
||||
if "new basisSet(" in line:
|
||||
@ -72,11 +72,15 @@ class EMSL_dump:
|
||||
|
||||
if "-ecp" in url.lower():
|
||||
continue
|
||||
d[name] = [name, url, des, elts]
|
||||
|
||||
d.append([name, url, des, elts])
|
||||
"""Tric for the unicity of the name"""
|
||||
array = [d[key] for key in d]
|
||||
|
||||
d_sort = sorted(d, key=lambda x: x[0])
|
||||
return d_sort
|
||||
array_sort = sorted(array, key=lambda x: x[0])
|
||||
print len(array_sort), "basisset will be download"
|
||||
|
||||
return array_sort
|
||||
|
||||
def create_url(self, url, name, elts):
|
||||
"""Create the adequate url to get the basis data"""
|
||||
@ -100,6 +104,7 @@ class EMSL_dump:
|
||||
b = data.find("$DATA")
|
||||
e = data.find("$END")
|
||||
if (b == -1 or data.find("$DATA$END") != -1):
|
||||
if debug:
|
||||
print data
|
||||
raise StandardError("WARNING not DATA")
|
||||
else:
|
||||
@ -124,22 +129,34 @@ class EMSL_dump:
|
||||
import threading
|
||||
|
||||
num_worker_threads = 7
|
||||
num_try_of_dwl = 2
|
||||
|
||||
q_in = Queue.Queue(num_worker_threads)
|
||||
q_out = Queue.Queue(num_worker_threads)
|
||||
|
||||
basis_raw = {}
|
||||
|
||||
def worker():
|
||||
"""get a Job from the q_in, do stuff, when finish put it in the q_out"""
|
||||
while True:
|
||||
[name, url, des, elts] = q_in.get()
|
||||
url = self.create_url(url, name, elts)
|
||||
q_out.put(
|
||||
([name, url, des, elts], self.requests.get(url).text))
|
||||
|
||||
for i in range(num_try_of_dwl):
|
||||
text = self.requests.get(url).text
|
||||
try:
|
||||
basis_data = self.basis_data_row_to_array(
|
||||
text, name, des, elts)
|
||||
break
|
||||
except:
|
||||
time.sleep(0.1)
|
||||
pass
|
||||
|
||||
q_out.put(([name, url, des, elts], basis_data))
|
||||
q_in.task_done()
|
||||
|
||||
def enqueue():
|
||||
for [name, url, des, elts] in list_basis_array:
|
||||
q_in.put(([name, url, des, elts]))
|
||||
|
||||
return 0
|
||||
|
||||
t = threading.Thread(target=enqueue)
|
||||
@ -151,21 +168,24 @@ class EMSL_dump:
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
for i in range(len(list_basis_array)):
|
||||
[name, url, des, elts], basis_raw = q_out.get()
|
||||
nb_basis = len(list_basis_array)
|
||||
|
||||
for i in range(nb_basis):
|
||||
[name, url, des, elts], basis_data = q_out.get()
|
||||
|
||||
try:
|
||||
basis_data = self.basis_data_row_to_array(
|
||||
basis_raw, name, des, elts)
|
||||
c.executemany(
|
||||
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
|
||||
conn.commit()
|
||||
print i, name
|
||||
|
||||
print '{:>3}'.format(i + 1), "/", nb_basis, name
|
||||
except:
|
||||
print name, url, des, elts
|
||||
pass
|
||||
print '{:>3}'.format(i + 1), "/", nb_basis, name, "fail",
|
||||
print ' ', [url, des, elts]
|
||||
raise
|
||||
conn.close()
|
||||
|
||||
q_in.join()
|
||||
q_out.join()
|
||||
|
||||
def new_db(self):
|
||||
"""Create new_db from scratch"""
|
||||
|
Loading…
Reference in New Issue
Block a user