10
0
mirror of https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local synced 2024-11-01 03:33:46 +01:00

Merge pull request #1 from scemama/master

Parallel creation of db and changing CLI
This commit is contained in:
TApplencourt 2014-11-12 16:32:40 +01:00
commit 7875f5a576
2 changed files with 98 additions and 20 deletions

View File

@ -4,29 +4,39 @@
"""EMSL Api. """EMSL Api.
Usage: Usage:
EMSL_api.py get_basis <db_path> EMSL_api.py get_list_basis <db_path>
EMSL_api.py get_list_elements <db_path> <basis_name> EMSL_api.py get_list_elements <db_path> <basis_name>
EMSL_api.py get_basis_data <db_path> <basis_name> <elts>... EMSL_api.py get_basis_data <db_path> <basis_name> <elts>...
EMSL_api.py create_db <db_path> <format> <contraction> EMSL_api.py get_list_formats
EMSL_api.py create_db <db_path> <format> [--no-contraction]
EMSL_api.py (-h | --help) EMSL_api.py (-h | --help)
EMSL_api.py --version EMSL_api.py --version
Options: Options:
-h --help Show this screen. -h --help Show this screen.
--version Show version. --version Show version.
--no-contraction Basis functions are not contracted
<db_path> is the path to the SQLite3 file containing the Basis sets.
""" """
version="0.1.1"
import sys import sys
sys.path.append('./src/') sys.path.append('./src/')
from docopt import docopt from docopt import docopt
from EMS_utility import EMSL_dump from EMSL_utility import EMSL_dump
from EMS_utility import EMSL_local from EMSL_utility import format_dict
from EMSL_utility import EMSL_local
if __name__ == '__main__': if __name__ == '__main__':
arguments = docopt(__doc__, version='EMSL Api 0.1')
if arguments["get_basis"]: arguments = docopt(__doc__, version='EMSL Api '+version)
print arguments
if arguments["get_list_basis"]:
db_path = arguments["<db_path>"] db_path = arguments["<db_path>"]
e = EMSL_local(db_path=db_path) e = EMSL_local(db_path=db_path)
@ -54,13 +64,21 @@ if __name__ == '__main__':
l = e.get_basis(basis_name, elts) l = e.get_basis(basis_name, elts)
for i in l: for i in l:
print i,'\n'
elif arguments["get_list_formats"]:
for i in format_dict:
print i print i
elif arguments["create_db"]: elif arguments["create_db"]:
db_path = arguments["<db_path>"] db_path = arguments["<db_path>"]
format = arguments["<format>"] format = arguments["<format>"]
contraction = arguments["<contraction>"] if format not in format_dict:
print "Format %s doesn't exist. Run get_list_formats to get the list of formats."%(format)
sys.exit(1)
contraction = not arguments["--no-contraction"]
print "go" print "go"
e = EMSL_dump(db_path=db_path, format=format, contraction=contraction) e = EMSL_dump(db_path=db_path, format=format_dict[format], contraction=contraction)
e.new_db() e.new_db()

View File

@ -2,15 +2,16 @@
import sqlite3 import sqlite3
import re import re
import sys import sys, os
debug=True
class EMSL_dump: class EMSL_dump:
def __init__(self, db_path=None, format="GAMESS-US", contraction="True"): def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
self.db_path = db_path self.db_path = db_path
self.format = format self.format = format
self.contraction = contraction self.contraction = str(contraction)
import requests import requests
self.requests = requests self.requests = requests
@ -26,6 +27,19 @@ class EMSL_dump:
"""Download the source code of the iframe who contains the list of the basis set available""" """Download the source code of the iframe who contains the list of the basis set available"""
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content" url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
if debug:
import cPickle as pickle
dbcache = 'db/cache'
if not os.path.isfile(dbcache):
page = self.requests.get(url).text
file=open(dbcache,'w')
pickle.dump(page,file)
else:
file=open(dbcache,'r')
page = pickle.load(file)
file.close()
else:
page = self.requests.get(url).text page = self.requests.get(url).text
print "Done" print "Done"
@ -104,24 +118,51 @@ class EMSL_dump:
c.execute('''CREATE TABLE all_value c.execute('''CREATE TABLE all_value
(name text, description text, elt text, data text)''') (name text, description text, elt text, data text)''')
for i, [name, url, des, elts] in enumerate(list_basis_array): import Queue
import threading
print i, [name, url, des, elts] num_worker_threads=7
q_in = Queue.Queue(num_worker_threads)
q_out = Queue.Queue(num_worker_threads)
basis_raw = {}
def worker():
while True:
[name, url, des, elts] = q_in.get()
url = self.create_url(url, name, elts) url = self.create_url(url, name, elts)
basis_raw = self.requests.get(url).text q_out.put ( ([name, url, des, elts], self.requests.get(url).text) )
q_in.task_done()
def enqueue():
for [name, url, des, elts] in list_basis_array:
q_in.put( ([name, url, des, elts]) )
return 0
t = threading.Thread(target=enqueue)
t.daemon = True
t.start()
for i in range(num_worker_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
for i in range(len(list_basis_array)):
[name, url, des, elts], basis_raw = q_out.get()
try: try:
basis_data = self.basis_data_row_to_array( basis_data = self.basis_data_row_to_array(
basis_raw, name, des, elts) basis_raw, name, des, elts)
c.executemany( c.executemany(
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data) "INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
conn.commit() conn.commit()
print "Done" print i, name
except: except:
print name, url, des, elts
pass pass
conn.close() conn.close()
q_in.join()
q_out.join()
def new_db(self): def new_db(self):
"""Create new_db from scratch""" """Create new_db from scratch"""
@ -156,7 +197,7 @@ class EMSL_local:
conn = sqlite3.connect(self.db_path) conn = sqlite3.connect(self.db_path)
c = conn.cursor() c = conn.cursor()
c.execute("SELECT DISTINCT elt from all_value WHERE name=:name_us", c.execute("SELECT DISTINCT elt from all_value WHERE name=:name_us COLLATE NOCASE",
{"name_us": basis_name}) {"name_us": basis_name})
data = c.fetchall() data = c.fetchall()
@ -174,7 +215,7 @@ class EMSL_local:
d = [] d = []
for elt in elts: for elt in elts:
c.execute("SELECT DISTINCT data from all_value WHERE name=:name_cur AND elt=:elt_cur", c.execute("SELECT DISTINCT data from all_value WHERE name=:name_cur COLLATE NOCASE AND elt=:elt_cur COLLATE NOCASE",
{"name_cur": basis_name, {"name_cur": basis_name,
"elt_cur": elt}) "elt_cur": elt})
@ -184,6 +225,25 @@ class EMSL_local:
conn.close() conn.close()
return d return d
format_dict = \
{
"g94": "Gaussian94" ,
"gamess-us": "GAMESS-US" ,
"gamess-uk": "GAMESS-UK" ,
"turbomole": "Turbomole" ,
"tx93" : "TX93" ,
"molpro" : "Molpro" ,
"molproint" : "MolproInt" ,
"hondo" : "Hondo" ,
"supermolecule" : "SuperMolecule" ,
"molcas" : "Molcas" ,
"hyperchem" : "HyperChem" ,
"dalton" : "Dalton" ,
"demon-ks" : "deMon-KS" ,
"demon2k" : "deMon2k" ,
"aces2" : "AcesII" ,
}
if __name__ == "__main__": if __name__ == "__main__":
e = EMSL_local(db_path="EMSL.db") e = EMSL_local(db_path="EMSL.db")