mirror of
https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local
synced 2024-12-22 04:13:43 +01:00
Merge pull request #3 from TApplencourt/master
Major change ! Read descritpion
This commit is contained in:
commit
839391ffe7
175
EMSL_api.py
175
EMSL_api.py
@ -4,19 +4,22 @@
|
||||
"""EMSL Api.
|
||||
|
||||
Usage:
|
||||
EMSL_api.py list_basis [--atom=atom_name...]
|
||||
[--db_path=db_path]
|
||||
EMSL_api.py list_atoms --basis=basis_name
|
||||
[--db_path=db_path]
|
||||
EMSL_api.py get_basis_data --basis=basis_name
|
||||
[--atom=atom_name...]
|
||||
[--db_path=db_path]
|
||||
[--with_l]
|
||||
[(--save [--path=path])]
|
||||
EMSL_api.py list_basis [--basis=<basis_name>...]
|
||||
[--atom=<atom_name>...]
|
||||
[--db_path=<db_path>]
|
||||
[--average_mo_number]
|
||||
EMSL_api.py list_atoms --basis=<basis_name>
|
||||
[--db_path=<db_path>]
|
||||
EMSL_api.py get_basis_data --basis=<basis_name>
|
||||
[--atom=<atom_name>...]
|
||||
[--db_path=<db_path>]
|
||||
[(--save [--path=<path>])]
|
||||
[--check=<program_name>]
|
||||
[--treat_l]
|
||||
EMSL_api.py list_formats
|
||||
EMSL_api.py create_db --db_path=db_path
|
||||
--format=format
|
||||
[--no-contraction]
|
||||
EMSL_api.py create_db --format=<format>
|
||||
[--db_path=<db_path>]
|
||||
[--no-contraction]
|
||||
EMSL_api.py (-h | --help)
|
||||
EMSL_api.py --version
|
||||
|
||||
@ -27,75 +30,100 @@ Options:
|
||||
|
||||
<db_path> is the path to the SQLite3 file containing the Basis sets.
|
||||
By default is $EMSL_API_ROOT/db/Gausian_uk.db
|
||||
|
||||
Example of use:
|
||||
./EMSL_api.py list_basis --atom Al --atom U
|
||||
./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
|
||||
./EMSL_api.py list_atoms --basis ANO-RCC
|
||||
./EMSL_api.py get_basis_data --basis 3-21++G*
|
||||
"""
|
||||
|
||||
version = "0.2.0"
|
||||
version = "0.8.1"
|
||||
|
||||
import sys
|
||||
import os
|
||||
|
||||
from src.docopt import docopt
|
||||
from src.EMSL_utility import EMSL_dump
|
||||
from src.EMSL_utility import format_dict
|
||||
from src.EMSL_utility import EMSL_local
|
||||
from src.misc.docopt import docopt
|
||||
from src.EMSL_dump import EMSL_dump
|
||||
from src.EMSL_local import EMSL_local, checkSQLite3
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
arguments = docopt(__doc__, version='EMSL Api ' + version)
|
||||
|
||||
# ___
|
||||
# | ._ o _|_
|
||||
# _|_ | | | |_
|
||||
#
|
||||
|
||||
if arguments["--db_path"]:
|
||||
db_path = arguments["--db_path"]
|
||||
else:
|
||||
import os
|
||||
db_path = os.path.dirname(__file__) + "/db/Gamess-us.db"
|
||||
db_path = os.path.dirname(__file__) + "/db/GAMESS-US.db"
|
||||
|
||||
# Check the db
|
||||
try:
|
||||
if not(arguments['create_db']):
|
||||
db_path, db_path_changed = checkSQLite3(db_path)
|
||||
except:
|
||||
raise
|
||||
|
||||
# _ _ _ ______ _
|
||||
# | | (_) | | | ___ \ (_)
|
||||
# | | _ ___| |_ | |_/ / __ _ ___ _ ___
|
||||
# | | | / __| __| | ___ \/ _` / __| / __|
|
||||
# | |___| \__ \ |_ | |_/ / (_| \__ \ \__ \
|
||||
# \_____/_|___/\__| \____/ \__,_|___/_|___/
|
||||
|
||||
# _ _ _ ______ _
|
||||
#| | (_) | | | ___ \ (_)
|
||||
#| | _ ___| |_ | |_/ / __ _ ___ _ ___
|
||||
#| | | / __| __| | ___ \/ _` / __| / __|
|
||||
#| |___| \__ \ |_ | |_/ / (_| \__ \ \__ \
|
||||
#\_____/_|___/\__| \____/ \__,_|___/_|___/
|
||||
#
|
||||
if arguments["list_basis"]:
|
||||
e = EMSL_local(db_path=db_path)
|
||||
|
||||
elts = arguments["--atom"]
|
||||
l = e.get_list_basis_available(elts)
|
||||
l = e.list_basis_available(arguments["--atom"],
|
||||
arguments["--basis"],
|
||||
arguments["--average_mo_number"])
|
||||
|
||||
for name, des in l:
|
||||
print name, "|", des
|
||||
if arguments["--average_mo_number"]:
|
||||
for name, des, avg in l:
|
||||
print "- '{}' ({}) || {:<50}".format(name, avg, des)
|
||||
else:
|
||||
for name, des in l:
|
||||
print "- '{}' || {:<50}".format(name, des)
|
||||
|
||||
# _ _ _ _____ _ _
|
||||
#| | (_) | | | ___| | | |
|
||||
#| | _ ___| |_ | |__ | | ___ _ __ ___ ___ _ __ | |_ ___
|
||||
#| | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
|
||||
#| |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \
|
||||
#\_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
|
||||
if arguments["list_atoms"]:
|
||||
# _ _ _ _____ _ _
|
||||
# | | (_) | | | ___| | | |
|
||||
# | | _ ___| |_ | |__ | | ___ _ __ ___ ___ _ __ | |_ ___
|
||||
# | | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
|
||||
# | |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \
|
||||
# \_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
|
||||
elif arguments["list_atoms"]:
|
||||
e = EMSL_local(db_path=db_path)
|
||||
|
||||
basis_name = arguments["--basis"]
|
||||
l = e.get_list_element_available(basis_name)
|
||||
print ", ".join(l)
|
||||
|
||||
#______ _ _ _
|
||||
#| ___ \ (_) | | | |
|
||||
#| |_/ / __ _ ___ _ ___ __| | __ _| |_ __ _
|
||||
#| ___ \/ _` / __| / __| / _` |/ _` | __/ _` |
|
||||
#| |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
|
||||
#\____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_|
|
||||
if arguments["get_basis_data"]:
|
||||
# ______ _ _ _
|
||||
# | ___ \ (_) | | | |
|
||||
# | |_/ / __ _ ___ _ ___ __| | __ _| |_ __ _
|
||||
# | ___ \/ _` / __| / __| / _` |/ _` | __/ _` |
|
||||
# | |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
|
||||
# \____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_|
|
||||
elif arguments["get_basis_data"]:
|
||||
e = EMSL_local(db_path=db_path)
|
||||
basis_name = arguments["--basis"]
|
||||
basis_name = arguments["--basis"][0]
|
||||
elts = arguments["--atom"]
|
||||
|
||||
l = e.get_basis(basis_name, elts,arguments["--with_l"])
|
||||
str_ = "\n\n".join(l) + "\n"
|
||||
l_atom_basis = e.get_basis(basis_name, elts,
|
||||
arguments["--treat_l"],
|
||||
arguments["--check"])
|
||||
# Add separation between atoms, and a empty last line
|
||||
str_ = "\n\n".join(l_atom_basis) + "\n"
|
||||
|
||||
if arguments["--save"]:
|
||||
|
||||
if arguments["--path"]:
|
||||
path = arguments["--path"]
|
||||
else:
|
||||
# The defaut path is bais
|
||||
path = "_".join([basis_name, ".".join(elts)])
|
||||
path = "/tmp/" + path + ".bs"
|
||||
|
||||
@ -105,32 +133,39 @@ if __name__ == '__main__':
|
||||
else:
|
||||
print str_
|
||||
|
||||
# _ _ _ __ _
|
||||
#| | (_) | | / _| | |
|
||||
#| | _ ___| |_ | |_ ___ _ __ _ __ ___ __ _| |_ ___
|
||||
#| | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __|
|
||||
#| |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \
|
||||
#\_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/
|
||||
if arguments["list_formats"]:
|
||||
for i in format_dict:
|
||||
# _ _ _ __ _
|
||||
# | | (_) | | / _| | |
|
||||
# | | _ ___| |_ | |_ ___ _ __ _ __ ___ __ _| |_ ___
|
||||
# | | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __|
|
||||
# | |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \
|
||||
# \_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/
|
||||
elif arguments["list_formats"]:
|
||||
e = EMSL_dump()
|
||||
for i in e.get_list_format():
|
||||
print i
|
||||
|
||||
# _____ _ _ _
|
||||
#/ __ \ | | | | |
|
||||
#| / \/_ __ ___ __ _| |_ ___ __| | |__
|
||||
#| | | '__/ _ \/ _` | __/ _ \ / _` | '_ \
|
||||
#| \__/\ | | __/ (_| | || __/ | (_| | |_) |
|
||||
# \____/_| \___|\__,_|\__\___| \__,_|_.__/
|
||||
if arguments["create_db"]:
|
||||
# _____ _ _ _
|
||||
# / __ \ | | | | |
|
||||
# | / \/_ __ ___ __ _| |_ ___ __| | |__
|
||||
# | | | '__/ _ \/ _` | __/ _ \ / _` | '_ \
|
||||
# | \__/\ | | __/ (_| | || __/ | (_| | |_) |
|
||||
# \____/_| \___|\__,_|\__\___| \__,_|_.__/
|
||||
elif arguments["create_db"]:
|
||||
db_path = arguments["--db_path"]
|
||||
format = arguments["--format"]
|
||||
if format not in format_dict:
|
||||
print "Format %s doesn't exist. Run list_formats to get the list of formats." % (format)
|
||||
sys.exit(1)
|
||||
|
||||
contraction = not arguments["--no-contraction"]
|
||||
|
||||
e = EMSL_dump(
|
||||
db_path=db_path,
|
||||
format=format_dict[format],
|
||||
contraction=contraction)
|
||||
e = EMSL_dump(db_path=db_path,
|
||||
format=format,
|
||||
contraction=contraction)
|
||||
e.new_db()
|
||||
|
||||
# _
|
||||
# / | _ _. ._ o ._ _
|
||||
# \_ | (/_ (_| | | | | | (_|
|
||||
# _|
|
||||
|
||||
# Clean up on exit
|
||||
if not(arguments['create_db']) and db_path_changed:
|
||||
os.system("rm -f /dev/shm/%d.db" % (os.getpid()))
|
||||
|
65
README.md
65
README.md
@ -4,28 +4,34 @@ EMSL_Basis_Set_Exchange_Local
|
||||
|
||||
Create of Local Copy of the famous [EMSL Basis Set Exchange](https://bse.pnl.gov/bse/portal) and use it easily with the API.
|
||||
|
||||
* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website (One database for all the basis set of one format);
|
||||
* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website. Currently avalaible format are :
|
||||
* Gamess-us, Gaussian94 and NEWCHEM;
|
||||
* API for scripting;
|
||||
* Quick local access without delay;
|
||||
* Only need [Python](https://www.python.org/) and [Request](http://docs.python-requests.org/en/latest/) module.
|
||||
* Only need [Python](https://www.python.org/)
|
||||
|
||||
##Dependencies
|
||||
* Python >2.6
|
||||
* Request ```pip install requests``` (in a virtual env or with sudo)
|
||||
|
||||
###### Optional
|
||||
If you plan to download manually some database -not using the pre existing one- you need :
|
||||
* [Request](http://docs.python-requests.org/en/latest/) python module. ```$pip install requests``` (do it in a virtual env or with sudo)
|
||||
|
||||
##Installation
|
||||
* Download the git (```$ git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
|
||||
* Done ! You can now, use ```EMSL_api.py```
|
||||
* Download the git repertory (```$git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
|
||||
* That all! You can now, use ```EMSL_api.py```
|
||||
|
||||
##Usage
|
||||
```
|
||||
EMSL Api.
|
||||
|
||||
Usage:
|
||||
EMSL_api.py list_basis [--atom=<atom_name>...]
|
||||
[--db_path=<db_path>]
|
||||
EMSL_api.py list_basis [--basis=<basis_name>...]
|
||||
[--atom=<atom_name>...]
|
||||
[--db_path=<db_path>]
|
||||
[--average_mo_number]
|
||||
EMSL_api.py list_atoms --basis=<basis_name>
|
||||
[--db_path=<db_path>]
|
||||
[--db_path=<db_path>]
|
||||
EMSL_api.py get_basis_data --basis=<basis_name>
|
||||
[--atom=<atom_name>...]
|
||||
[--db_path=<db_path>]
|
||||
@ -45,6 +51,12 @@ Options:
|
||||
|
||||
<db_path> is the path to the SQLite3 file containing the Basis sets.
|
||||
By default is $EMSL_API_ROOT/db/Gausian_uk.db
|
||||
|
||||
Example of use:
|
||||
./EMSL_api.py list_basis --atom Al --atom U
|
||||
./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
|
||||
./EMSL_api.py list_atoms --basis ANO-RCC
|
||||
./EMSL_api.py get_basis_data --basis 3-21++G*
|
||||
```
|
||||
##Demonstration
|
||||
|
||||
@ -53,14 +65,47 @@ By default is $EMSL_API_ROOT/db/Gausian_uk.db
|
||||
(For a beter quality see the [Source](https://asciinema.org/api/asciicasts/15380))
|
||||
|
||||
##To do
|
||||
For now we can only parse Gaussian-US basis set type file. (Look at ```./src/EMSL_utility.py#EMSL_dump.basis_data_row_to_array```)
|
||||
For now we can only parse `Gamess-us, Gaussian94 and NEWCHEM` (Thanks to @mattbernst for Gaussian94 and NEWCHEM) basis set type file.
|
||||
|
||||
###I need more format!
|
||||
|
||||
I realy simple. Just read the few explanation bellow.
|
||||
|
||||
You just need to provide a function who will split the basis data who containt all the atoms in atom only tuple.
|
||||
|
||||
Sommething like this:
|
||||
```python
|
||||
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
|
||||
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
|
||||
|
||||
The data-pairs item is actually expected to be a 2 item list:
|
||||
[symbol, data]
|
||||
|
||||
e.g. ["Ca", "#BASIS SET..."]
|
||||
|
||||
N.B.: Currently ignores ECP data!
|
||||
|
||||
@param data: raw HTML from BSE
|
||||
@type data : unicode
|
||||
@param name: basis set name
|
||||
@type name : str
|
||||
@param des: basis set description
|
||||
@type des : str
|
||||
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
|
||||
@type elements : list
|
||||
@return: (name, description, data-pairs)
|
||||
@rtype : tuple
|
||||
"""
|
||||
```
|
||||
|
||||
Then just add the function in `src.parser_handler.format_dict`. You are ready to go!
|
||||
|
||||
Feel free to fork/pull request.
|
||||
|
||||
##Disclaimer
|
||||
It'is not a official API. Use it with moderation.
|
||||
|
||||
In papers where you use the basis sets obtained from the Basis Set Exchange please site this :
|
||||
In papers where you use the basis sets obtained from the Basis Set Exchange please site this:
|
||||
>The Role of Databases in Support of Computational Chemistry Calculations
|
||||
>
|
||||
>>--<cite>Feller, D.; J. Comp. Chem., 17(13), 1571-1586, 1996.</cite>
|
||||
|
Binary file not shown.
303
src/EMSL_dump.py
Normal file
303
src/EMSL_dump.py
Normal file
@ -0,0 +1,303 @@
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
import time
|
||||
import sqlite3
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
|
||||
def install_with_pip(name):
|
||||
|
||||
ins = False
|
||||
d = {'y': True,
|
||||
'n': False}
|
||||
|
||||
while True:
|
||||
choice = raw_input('Do you want to install it ? [Y/N]')
|
||||
try:
|
||||
ins = d[choice.lower()]
|
||||
break
|
||||
except:
|
||||
print "not a valid choice"
|
||||
|
||||
if ins:
|
||||
try:
|
||||
import pip
|
||||
pip.main(['install', name])
|
||||
except:
|
||||
print "You need pip"
|
||||
print "(http://pip.readthedocs.org/en/latest/installing.html)"
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
class EMSL_dump:
|
||||
|
||||
"""
|
||||
This call implement all you need for download the EMSL and save it localy
|
||||
"""
|
||||
|
||||
def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
|
||||
|
||||
from src.parser_handler import get_parser_function
|
||||
from src.parser_handler import check_format
|
||||
|
||||
self.format = check_format(format)
|
||||
self.parser = get_parser_function(self.format)
|
||||
|
||||
if db_path:
|
||||
self.db_path = db_path
|
||||
else:
|
||||
head_path = os.path.dirname(__file__)
|
||||
self.db_path = "{0}/../db/{1}.db".format(head_path, self.format)
|
||||
|
||||
self.contraction = str(contraction)
|
||||
self.debug = False
|
||||
|
||||
try:
|
||||
import requests
|
||||
except:
|
||||
print "You need the requests package"
|
||||
install_with_pip("requests")
|
||||
finally:
|
||||
self.requests = requests
|
||||
|
||||
def get_list_format(self):
|
||||
"""List all the format available in EMSL"""
|
||||
from src.parser_handler import parser_dict
|
||||
return parser_dict.keys()
|
||||
|
||||
def set_db_path(self, path):
|
||||
"""Define the database path"""
|
||||
self.db_path = path
|
||||
|
||||
def dwl_basis_list_raw(self):
|
||||
"""Return the source code of the iframe
|
||||
who contains the list of the basis set available"""
|
||||
|
||||
print "Download all the name available in EMSL."
|
||||
print "It can take some time.",
|
||||
sys.stdout.flush()
|
||||
|
||||
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
|
||||
if self.debug:
|
||||
import cPickle as pickle
|
||||
dbcache = 'db/cache'
|
||||
if not os.path.isfile(dbcache):
|
||||
page = self.requests.get(url).text
|
||||
file = open(dbcache, 'w')
|
||||
pickle.dump(page, file)
|
||||
else:
|
||||
file = open(dbcache, 'r')
|
||||
page = pickle.load(file)
|
||||
file.close()
|
||||
|
||||
else:
|
||||
page = self.requests.get(url).text
|
||||
|
||||
print "Done"
|
||||
return page
|
||||
|
||||
def basis_list_raw_to_array(self, data_raw):
|
||||
"""Parse the raw html basis set to create a dict
|
||||
will all the information for dowloanding the database :
|
||||
Return d[name] = [name, xml_path, description,
|
||||
lits of the elements available]
|
||||
|
||||
Explanation of tuple data from 'tup' by index:
|
||||
|
||||
0 - path to xml file
|
||||
1 - basis set name
|
||||
2 - categorization: "dftcfit", "dftorb", "dftxfit", "diffuse",
|
||||
"ecporb","effective core potential", "orbital", "polarization",
|
||||
"rydberg", or "tight"
|
||||
3 - parameterized elements by symbol e.g. '[H, He, B, C, N, O, F, Ne]'
|
||||
4 - curation status; only 'published' is trustworthy
|
||||
5 - boolean: has ECP
|
||||
6 - boolean: has spin
|
||||
7 - last modified date
|
||||
8 - name of primary developer
|
||||
9 - name of contributor
|
||||
10 - human-readable summary/description of basis set
|
||||
"""
|
||||
|
||||
d = OrderedDict()
|
||||
|
||||
for line in data_raw.split('\n'):
|
||||
|
||||
if "new basisSet(" in line:
|
||||
b = line.find("(")
|
||||
e = line.find(");")
|
||||
|
||||
s = line[b + 1:e]
|
||||
|
||||
tup = eval(s)
|
||||
|
||||
xml_path = tup[0]
|
||||
|
||||
# non-published (e.g. rejected) basis sets and ecp should be
|
||||
# ignored
|
||||
if tup[4] != "published" or "-ecp" in xml_path.lower():
|
||||
continue
|
||||
|
||||
name = tup[1]
|
||||
elts = re.sub('[["\ \]]', '', tup[3]).split(',')
|
||||
des = re.sub('\s+', ' ', tup[-1])
|
||||
|
||||
d[name] = [name, xml_path, des, elts]
|
||||
|
||||
return d
|
||||
|
||||
# _____ _
|
||||
# / __ \ | |
|
||||
# | / \/_ __ ___ __ _| |_ ___
|
||||
# | | | '__/ _ \/ _` | __/ _ \
|
||||
# | \__/\ | | __/ (_| | || __/
|
||||
# \____/_| \___|\__,_|\__\___|
|
||||
#
|
||||
def create_sql(self, dict_basis_list):
|
||||
"""Create the sql from strach.
|
||||
Take the list of basis available data,
|
||||
download her, put her in sql"""
|
||||
|
||||
if os.path.isfile(self.db_path):
|
||||
print >> sys.stderr, "FAILLURE:"
|
||||
print >> sys.stderr, "{0} file alredy exist.".format(self.db_path),
|
||||
print >> sys.stderr, "Delete or remove it"
|
||||
sys.exit(1)
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
c.execute('''CREATE TABLE basis_tab(
|
||||
basis_id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name text,
|
||||
description text,
|
||||
UNIQUE(name)
|
||||
);''')
|
||||
|
||||
c.execute('''CREATE TABLE data_tab(
|
||||
basis_id INTEGER,
|
||||
elt TEXT,
|
||||
data TEXT,
|
||||
FOREIGN KEY(basis_id)
|
||||
REFERENCES basis_tab(basis_id)
|
||||
);''')
|
||||
|
||||
c.execute('''CREATE TABLE format_tab(format TEXT)''')
|
||||
c.execute('''INSERT INTO format_tab VALUES (?)''', [self.format])
|
||||
conn.commit()
|
||||
|
||||
c.execute(''' CREATE VIEW output_tab AS
|
||||
SELECT basis_id,
|
||||
name,
|
||||
description,
|
||||
elt,
|
||||
data
|
||||
FROM basis_tab
|
||||
NATURAL JOIN data_tab
|
||||
''')
|
||||
|
||||
import Queue
|
||||
import threading
|
||||
|
||||
num_worker_threads = 7
|
||||
attemps_max = 20
|
||||
|
||||
q_in = Queue.Queue(num_worker_threads)
|
||||
q_out = Queue.Queue(num_worker_threads)
|
||||
|
||||
def worker():
|
||||
"""get a Job from the q_in, do stuff,
|
||||
when finish put it in the q_out"""
|
||||
while True:
|
||||
name, path_xml, des, elts = q_in.get()
|
||||
|
||||
url = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
|
||||
url += "/eventSubmit_doDownload/true"
|
||||
|
||||
params = {'bsurl': path_xml, 'bsname': name,
|
||||
'elts': " ".join(elts),
|
||||
'format': self.format,
|
||||
'minimize': self.contraction}
|
||||
|
||||
attemps = 0
|
||||
while attemps < attemps_max:
|
||||
text = self.requests.get(url, params=params).text
|
||||
try:
|
||||
basis_data = self.parser(text, name, des, elts,
|
||||
self.debug)
|
||||
except:
|
||||
time.sleep(0.1)
|
||||
attemps += 1
|
||||
else:
|
||||
break
|
||||
|
||||
try:
|
||||
q_out.put(basis_data)
|
||||
except:
|
||||
if self.debug:
|
||||
print "Fail on q_out.put", basis_data
|
||||
raise
|
||||
else:
|
||||
q_in.task_done()
|
||||
|
||||
def enqueue():
|
||||
for [name, path_xml, des, elts] in dict_basis_list.itervalues():
|
||||
q_in.put([name, path_xml, des, elts])
|
||||
|
||||
return 0
|
||||
|
||||
t = threading.Thread(target=enqueue)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
for i in range(num_worker_threads):
|
||||
t = threading.Thread(target=worker)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
nb_basis = len(dict_basis_list)
|
||||
|
||||
for i in range(nb_basis):
|
||||
name, des, basis_data = q_out.get()
|
||||
q_out.task_done()
|
||||
|
||||
str_indice = '{:>3}'.format(i + 1)
|
||||
str_ = '{0} / {1} | {2}'.format(str_indice, nb_basis, name)
|
||||
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
# A d d _ t h e _ b a s i s _ n a m e #
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
try:
|
||||
cmd = "INSERT INTO basis_tab(name,description) VALUES (?,?)"
|
||||
c.execute(cmd, [name, des])
|
||||
conn.commit()
|
||||
except sqlite3.IntegrityError:
|
||||
print str_, "Fail"
|
||||
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
# A d d _ t h e _ b a s i s _ d a t a #
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
|
||||
id_ = [c.lastrowid]
|
||||
|
||||
try:
|
||||
cmd = "INSERT INTO data_tab(basis_id,elt,data) VALUES (?,?,?)"
|
||||
c.executemany(cmd, [id_ + k for k in basis_data])
|
||||
conn.commit()
|
||||
except sqlite3.IntegrityError:
|
||||
print str_, "Fail"
|
||||
else:
|
||||
print str_
|
||||
conn.close()
|
||||
|
||||
q_in.join()
|
||||
|
||||
def new_db(self):
|
||||
"""Create new_db from scratch"""
|
||||
|
||||
_data = self.dwl_basis_list_raw()
|
||||
array_basis = self.basis_list_raw_to_array(_data)
|
||||
|
||||
self.create_sql(array_basis)
|
317
src/EMSL_local.py
Executable file
317
src/EMSL_local.py
Executable file
@ -0,0 +1,317 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def checkSQLite3(db_path):
|
||||
"""Check if the db_path is a good one"""
|
||||
|
||||
from os.path import isfile, getsize
|
||||
|
||||
db_path = os.path.expanduser(db_path)
|
||||
db_path = os.path.expandvars(db_path)
|
||||
db_path = os.path.abspath(db_path)
|
||||
|
||||
# Check if db file is readable
|
||||
if not os.access(db_path, os.R_OK):
|
||||
print >>sys.stderr, "Db file %s is not readable" % (db_path)
|
||||
raise IOError
|
||||
|
||||
if not isfile(db_path):
|
||||
print >>sys.stderr, "Db file %s is not... a file!" % (db_path)
|
||||
raise IOError
|
||||
|
||||
if getsize(db_path) < 100: # SQLite database file header is 100 bytes
|
||||
print >>sys.stderr, "Db file %s is not a SQLite file!" % (db_path)
|
||||
raise IOError
|
||||
|
||||
with open(db_path, 'rb') as fd:
|
||||
header = fd.read(100)
|
||||
|
||||
if header[:16] != 'SQLite format 3\x00':
|
||||
print >>sys.stderr, "Db file %s is not in SQLiteFormat3!" % (db_path)
|
||||
raise IOError
|
||||
|
||||
# Check if the file system allows I/O on sqlite3 (lustre)
|
||||
# If not, copy on /dev/shm and remove after opening
|
||||
try:
|
||||
EMSL_local(db_path=db_path).list_basis_available()
|
||||
except sqlite3.OperationalError:
|
||||
print >>sys.stderr, "I/O Error for you file system"
|
||||
print >>sys.stderr, "Try some fixe"
|
||||
new_db_path = "/dev/shm/%d.db" % (os.getpid())
|
||||
os.system("cp %s %s" % (db_path, new_db_path))
|
||||
db_path = new_db_path
|
||||
else:
|
||||
changed = False
|
||||
return db_path, changed
|
||||
|
||||
# Try again to check
|
||||
try:
|
||||
EMSL_local(db_path=db_path).list_basis_available()
|
||||
except:
|
||||
print >>sys.stderr, "Sorry..."
|
||||
os.system("rm -f /dev/shm/%d.db" % (os.getpid()))
|
||||
raise
|
||||
else:
|
||||
print >>sys.stderr, "Working !"
|
||||
changed = True
|
||||
return db_path, changed
|
||||
|
||||
|
||||
def cond_sql_or(table_name, l_value, glob=False):
|
||||
"""Take a table_name, a list of value and create the sql or combande"""
|
||||
|
||||
opr = "GLOB" if glob else "="
|
||||
|
||||
return [" OR ".join(['{} {} "{}"'.format(table_name,
|
||||
opr,
|
||||
val) for val in l_value])]
|
||||
|
||||
|
||||
def string_to_nb_mo(str_type):
|
||||
"""Take a string and return the nb of orbital"""
|
||||
|
||||
d = {"S": 3,
|
||||
"P": 5,
|
||||
"D": 7,
|
||||
"SP": 8}
|
||||
|
||||
if str_type in d:
|
||||
return d[str_type]
|
||||
# ord("F") = 70 and ord("Z") = 87
|
||||
elif 70 <= ord(str_type) <= 87:
|
||||
# ord("F") = 70 and l = 4 so ofset if 66
|
||||
return 2 * (ord(str_type) - 66) + 1
|
||||
else:
|
||||
raise BaseException
|
||||
|
||||
# _ __
|
||||
# |_ |\/| (_ | | _ _ _. |
|
||||
# |_ | | __) |_ |_ (_) (_ (_| |
|
||||
#
|
||||
class EMSL_local:
|
||||
|
||||
"""
|
||||
All the method for using the EMSL db localy
|
||||
"""
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
self.db_path = db_path
|
||||
|
||||
self.conn = sqlite3.connect(self.db_path)
|
||||
self.c = self.conn.cursor()
|
||||
|
||||
self.c.execute("SELECT * from format_tab")
|
||||
self.format = self.c.fetchone()[0]
|
||||
|
||||
def list_basis_available(self,
|
||||
elts=[],
|
||||
basis=[],
|
||||
average_mo_number=False):
|
||||
"""
|
||||
return all the basis name who contant all the elts
|
||||
"""
|
||||
# If not elts just get the distinct name
|
||||
# Else: 1) fetch for geting all the run_id whos satisfy the condition
|
||||
# 2) If average_mo_number:
|
||||
# * Get name,descripption,data
|
||||
# * Then parse it
|
||||
# Else Get name,description
|
||||
# 3) Parse it
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# F i l t e r #
|
||||
# ~#~#~#~#~#~ #
|
||||
|
||||
if basis:
|
||||
cmd_filter_basis = " ".join(cond_sql_or("name", basis, glob=True))
|
||||
else:
|
||||
cmd_filter_basis = "(1)"
|
||||
|
||||
# Not Ets
|
||||
if not elts:
|
||||
if not average_mo_number:
|
||||
cmd = """SELECT DISTINCT name, description
|
||||
FROM basis_tab
|
||||
WHERE {0}"""
|
||||
else:
|
||||
cmd = """SELECT DISTINCT name, description, data
|
||||
FROM output_tab
|
||||
WHERE {0}"""
|
||||
|
||||
cmd = cmd.format(cmd_filter_basis)
|
||||
|
||||
else:
|
||||
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
# G e t t i n g _ B a s i s I d #
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
|
||||
str_ = """SELECT DISTINCT basis_id
|
||||
FROM output_tab
|
||||
WHERE elt=? AND {0}""".format(cmd_filter_basis)
|
||||
|
||||
cmd = " INTERSECT ".join([str_] * len(elts)) + ";"
|
||||
self.c.execute(cmd, elts)
|
||||
|
||||
l_basis_id = [i[0] for i in self.c.fetchall()]
|
||||
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
# C r e a t e _ t h e _ c m d #
|
||||
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
|
||||
|
||||
cmd_filter_basis = " ".join(cond_sql_or("basis_id", l_basis_id))
|
||||
cmd_filter_ele = " ".join(cond_sql_or("elt", elts))
|
||||
|
||||
column_to_fech = "name, description"
|
||||
if average_mo_number:
|
||||
column_to_fech += ", data"
|
||||
|
||||
filter_where = " ({}) AND ({})".format(
|
||||
cmd_filter_ele,
|
||||
cmd_filter_basis)
|
||||
|
||||
cmd = """SELECT DISTINCT {0}
|
||||
FROM output_tab
|
||||
WHERE {1}
|
||||
ORDER BY name""".format(column_to_fech, filter_where)
|
||||
# ~#~#~#~#~ #
|
||||
# F e t c h #
|
||||
# ~#~#~#~#~ #
|
||||
|
||||
self.c.execute(cmd)
|
||||
info = self.c.fetchall()
|
||||
|
||||
# ~#~#~#~#~#~#~ #
|
||||
# P a r s i n g #
|
||||
# ~#~#~#~#~#~#~ #
|
||||
# If average_mo_number is asking
|
||||
|
||||
from collections import OrderedDict
|
||||
dict_info = OrderedDict()
|
||||
# Description : dict_info[name] = [description, nb_mo, nb_ele]
|
||||
|
||||
from src.parser_handler import get_symmetry_function
|
||||
if average_mo_number:
|
||||
|
||||
f_symmetry = get_symmetry_function(self.format)
|
||||
|
||||
for name, description, atom_basis in info:
|
||||
|
||||
nb_mo = 0
|
||||
|
||||
line = atom_basis.split("\n")
|
||||
|
||||
for type_, _, _ in f_symmetry(line):
|
||||
|
||||
nb_mo += string_to_nb_mo(type_)
|
||||
try:
|
||||
dict_info[name][1] += nb_mo
|
||||
dict_info[name][2] += 1.
|
||||
except KeyError:
|
||||
dict_info[name] = [description, nb_mo, 1.]
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# R e t u r n #
|
||||
# ~#~#~#~#~#~ #
|
||||
|
||||
if average_mo_number:
|
||||
return[[k, v[0], str(v[1] / v[2])] for k, v in dict_info.iteritems()]
|
||||
else:
|
||||
return [i[:] for i in info]
|
||||
|
||||
def get_list_element_available(self, basis_name):
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# F i l t e r #
|
||||
# ~#~#~#~#~#~ #
|
||||
|
||||
str_ = """SELECT DISTINCT elt
|
||||
FROM output_tab
|
||||
WHERE name=(?) COLLATE NOCASE"""
|
||||
|
||||
# ~#~#~#~#~ #
|
||||
# F e t c h #
|
||||
# ~#~#~#~#~ #
|
||||
|
||||
self.c.execute(str_, basis_name)
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# R e t u r n #
|
||||
# ~#~#~#~#~#~ #
|
||||
|
||||
return [str(i[0]) for i in self.c.fetchall()]
|
||||
|
||||
def get_basis(self,
|
||||
basis_name, elts=None,
|
||||
handle_l_format=False, check_format=None):
|
||||
"""
|
||||
Return the data from the basis set
|
||||
"""
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# F i l t e r #
|
||||
# ~#~#~#~#~#~ #
|
||||
|
||||
cmd_filter_ele = " ".join(cond_sql_or("elt", elts)) if elts else "(1)"
|
||||
|
||||
self.c.execute('''SELECT DISTINCT data from output_tab
|
||||
WHERE name="{0}"
|
||||
AND {1}'''.format(basis_name, cmd_filter_ele))
|
||||
|
||||
# We need to take i[0] because fetchall return a tuple [(value),...]
|
||||
l_atom_basis = [i[0].strip() for i in self.c.fetchall()]
|
||||
|
||||
# ~#~#~#~#~#~#~#~ #
|
||||
# h a n d l e _ f #
|
||||
# ~#~#~#~#~#~#~#~ #
|
||||
if handle_l_format:
|
||||
from src.parser_handler import get_handle_l_function
|
||||
f = get_handle_l_function(self.format)
|
||||
l_atom_basis = f(l_atom_basis)
|
||||
|
||||
# ~#~#~#~#~ #
|
||||
# C h e c k #
|
||||
# ~#~#~#~#~ #
|
||||
|
||||
if check_format:
|
||||
|
||||
from src.parser_handler import get_symmetry_function
|
||||
from src.parser.check_validity import get_check_function
|
||||
|
||||
f = get_check_function(check_format)
|
||||
f_symmetry = get_symmetry_function(self.format)
|
||||
|
||||
for atom_basis in l_atom_basis:
|
||||
lines = atom_basis.split("\n")
|
||||
for type_, _, _ in f_symmetry(lines):
|
||||
try:
|
||||
f(type_)
|
||||
except AssertionError:
|
||||
print "False. You have somme special function like SP"
|
||||
sys.exit(1)
|
||||
except BaseException:
|
||||
print "Fail !"
|
||||
sys.exit(1)
|
||||
|
||||
# ~#~#~#~#~#~ #
|
||||
# R e t u r n #
|
||||
# ~#~#~#~#~#~ #
|
||||
return l_atom_basis
|
||||
if __name__ == "__main__":
|
||||
|
||||
e = EMSL_local(db_path="EMSL.db")
|
||||
l = e.get_list_basis_available()
|
||||
for i in l:
|
||||
print i
|
||||
|
||||
l = e.get_list_element_available("pc-0")
|
||||
print l
|
||||
|
||||
l = e.get_basis("cc-pVTZ", ["H", "He"])
|
||||
for i in l:
|
||||
print i
|
@ -1,435 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import sqlite3
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
import time
|
||||
|
||||
debug = True
|
||||
|
||||
elt_path = os.path.dirname(sys.argv[0]) + "/src/elts_abrev.dat"
|
||||
|
||||
with open(elt_path, "r") as f:
|
||||
data = f.readlines()
|
||||
|
||||
dict_ele = dict()
|
||||
for i in data:
|
||||
l = i.split("-")
|
||||
dict_ele[l[1].strip().lower()] = l[2].strip().lower()
|
||||
|
||||
|
||||
def install_with_pip(name):
|
||||
|
||||
ins = False
|
||||
d = {'y': True,
|
||||
'n': False}
|
||||
|
||||
while True:
|
||||
choice = raw_input('Do you want to install it ? [Y/N]')
|
||||
try:
|
||||
ins = d[choice.lower()]
|
||||
break
|
||||
except:
|
||||
print "not a valid choice"
|
||||
|
||||
if ins:
|
||||
try:
|
||||
import pip
|
||||
pip.main(['install', name])
|
||||
except:
|
||||
print "You need pip, (http://pip.readthedocs.org/en/latest/installing.html)"
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def cond_sql_or(table_name, l_value):
|
||||
|
||||
l = []
|
||||
dmy = " OR ".join(['%s = "%s"' % (table_name, i) for i in l_value])
|
||||
if dmy:
|
||||
l.append("(%s)" % dmy)
|
||||
|
||||
return l
|
||||
|
||||
|
||||
class EMSL_dump:
|
||||
|
||||
def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
|
||||
self.db_path = db_path
|
||||
self.format = format
|
||||
self.contraction = str(contraction)
|
||||
|
||||
try:
|
||||
import requests
|
||||
except:
|
||||
print "You need the requests package"
|
||||
install_with_pip("requests")
|
||||
finally:
|
||||
self.requests = requests
|
||||
|
||||
def set_db_path(self, path):
|
||||
"""Define the database path"""
|
||||
self.db_path = path
|
||||
|
||||
def dwl_basis_list_raw(self):
|
||||
print "Download all the name available in EMSL. It can take some time.",
|
||||
sys.stdout.flush()
|
||||
|
||||
"""Download the source code of the iframe who contains the list of the basis set available"""
|
||||
|
||||
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
|
||||
if debug:
|
||||
import cPickle as pickle
|
||||
dbcache = 'db/cache'
|
||||
if not os.path.isfile(dbcache):
|
||||
page = self.requests.get(url).text
|
||||
file = open(dbcache, 'w')
|
||||
pickle.dump(page, file)
|
||||
else:
|
||||
file = open(dbcache, 'r')
|
||||
page = pickle.load(file)
|
||||
file.close()
|
||||
|
||||
else:
|
||||
page = self.requests.get(url).text
|
||||
|
||||
print "Done"
|
||||
return page
|
||||
|
||||
def bl_raw_to_array(self, data_raw):
|
||||
"""Parse the raw html to create a basis set array whith all the info:
|
||||
url, name,description"""
|
||||
|
||||
d = {}
|
||||
|
||||
for line in data_raw.split('\n'):
|
||||
if "new basisSet(" in line:
|
||||
b = line.find("(")
|
||||
e = line.find(");")
|
||||
|
||||
s = line[b + 1:e]
|
||||
|
||||
tup = eval(s)
|
||||
url = tup[0]
|
||||
name = tup[1]
|
||||
|
||||
junkers = re.compile('[[" \]]')
|
||||
elts = junkers.sub('', tup[3]).split(',')
|
||||
|
||||
des = tup[-1]
|
||||
|
||||
if "-ecp" in url.lower():
|
||||
continue
|
||||
d[name] = [name, url, des, elts]
|
||||
|
||||
"""Tric for the unicity of the name"""
|
||||
array = [d[key] for key in d]
|
||||
|
||||
array_sort = sorted(array, key=lambda x: x[0])
|
||||
print len(array_sort), "basisset will be download"
|
||||
|
||||
return array_sort
|
||||
|
||||
def create_url(self, url, name, elts):
|
||||
"""Create the adequate url to get the basis data"""
|
||||
|
||||
elts_string = " ".join(elts)
|
||||
|
||||
path = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
|
||||
path += "/eventSubmit_doDownload/true"
|
||||
path += "?bsurl=" + url
|
||||
path += "&bsname=" + name
|
||||
path += "&elts=" + elts_string
|
||||
path += "&format=" + self.format
|
||||
path += "&minimize=" + self.contraction
|
||||
return path
|
||||
|
||||
def basis_data_row_to_array(self, data, name, des, elts):
|
||||
"""Parse the basis data raw html to get a nice tuple"""
|
||||
|
||||
d = []
|
||||
|
||||
b = data.find("$DATA")
|
||||
e = data.find("$END")
|
||||
if (b == -1 or data.find("$DATA$END") != -1):
|
||||
if debug:
|
||||
print data
|
||||
raise Exception("WARNING not DATA")
|
||||
else:
|
||||
data = data.replace("PHOSPHOROUS", "PHOSPHORUS")
|
||||
data = data.replace("D+", "E+")
|
||||
data = data.replace("D-", "E-")
|
||||
|
||||
data = data[b + 5:e - 1].split('\n\n')
|
||||
|
||||
for (elt, data_elt) in zip(elts, data):
|
||||
|
||||
elt_long_th = dict_ele[elt.lower()]
|
||||
elt_long_exp = data_elt.split()[0].lower()
|
||||
|
||||
if "$" in data_elt:
|
||||
print "Eror",
|
||||
raise Exception("WARNING not bad split")
|
||||
|
||||
if elt_long_th == elt_long_exp:
|
||||
d.append((name, des, elt, data_elt.strip()))
|
||||
else:
|
||||
print "th", elt_long_th
|
||||
print "exp", elt_long_exp
|
||||
print "abv", elt
|
||||
raise Exception("WARNING not good ELEMENT")
|
||||
|
||||
return d
|
||||
|
||||
def create_sql(self, list_basis_array):
|
||||
"""Create the sql from the list of basis available data"""
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
# Create table
|
||||
c.execute('''CREATE TABLE all_value
|
||||
(name text, description text, elt text, data text)''')
|
||||
|
||||
import Queue
|
||||
import threading
|
||||
|
||||
num_worker_threads = 7
|
||||
attemps_max = 20
|
||||
|
||||
q_in = Queue.Queue(num_worker_threads)
|
||||
q_out = Queue.Queue(num_worker_threads)
|
||||
|
||||
def worker():
|
||||
"""get a Job from the q_in, do stuff, when finish put it in the q_out"""
|
||||
while True:
|
||||
[name, url, des, elts] = q_in.get()
|
||||
url = self.create_url(url, name, elts)
|
||||
|
||||
attemps = 0
|
||||
while attemps < attemps_max:
|
||||
text = self.requests.get(url).text
|
||||
try:
|
||||
basis_data = self.basis_data_row_to_array(
|
||||
text, name, des, elts)
|
||||
break
|
||||
except:
|
||||
time.sleep(0.1)
|
||||
attemps += 1
|
||||
|
||||
try:
|
||||
q_out.put(([name, url, des, elts], basis_data))
|
||||
q_in.task_done()
|
||||
except:
|
||||
print name, url, des
|
||||
raise
|
||||
|
||||
def enqueue():
|
||||
for [name, url, des, elts] in list_basis_array:
|
||||
q_in.put(([name, url, des, elts]))
|
||||
|
||||
return 0
|
||||
|
||||
t = threading.Thread(target=enqueue)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
for i in range(num_worker_threads):
|
||||
t = threading.Thread(target=worker)
|
||||
t.daemon = True
|
||||
t.start()
|
||||
|
||||
nb_basis = len(list_basis_array)
|
||||
|
||||
for i in range(nb_basis):
|
||||
[name, url, des, elts], basis_data = q_out.get()
|
||||
|
||||
try:
|
||||
c.executemany(
|
||||
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
|
||||
conn.commit()
|
||||
|
||||
print '{:>3}'.format(i + 1), "/", nb_basis, name
|
||||
except:
|
||||
print '{:>3}'.format(i + 1), "/", nb_basis, name, "fail",
|
||||
print ' ', [url, des, elts]
|
||||
raise
|
||||
conn.close()
|
||||
|
||||
q_in.join()
|
||||
|
||||
def new_db(self):
|
||||
"""Create new_db from scratch"""
|
||||
|
||||
_data = self.dwl_basis_list_raw()
|
||||
array_basis = self.bl_raw_to_array(_data)
|
||||
del _data
|
||||
|
||||
self.create_sql(array_basis)
|
||||
|
||||
|
||||
class EMSL_local:
|
||||
|
||||
def __init__(self, db_path=None):
|
||||
self.db_path = db_path
|
||||
|
||||
def get_list_basis_available(self, elts=[]):
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
if not elts:
|
||||
|
||||
c.execute("SELECT DISTINCT name,description from all_value")
|
||||
data = c.fetchall()
|
||||
|
||||
else:
|
||||
cmd = [
|
||||
"SELECT name,description FROM all_value WHERE elt=?"] * len(elts)
|
||||
cmd = " INTERSECT ".join(cmd) + ";"
|
||||
|
||||
c.execute(cmd, elts)
|
||||
data = c.fetchall()
|
||||
|
||||
data = [i[:] for i in data]
|
||||
|
||||
conn.close()
|
||||
|
||||
return data
|
||||
|
||||
def get_list_element_available(self, basis_name):
|
||||
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
c.execute(
|
||||
"SELECT DISTINCT elt from all_value WHERE name=:name_us COLLATE NOCASE", {
|
||||
"name_us": basis_name})
|
||||
|
||||
data = c.fetchall()
|
||||
|
||||
data = [str(i[0]) for i in data]
|
||||
|
||||
conn.close()
|
||||
return data
|
||||
|
||||
def get_basis(self, basis_name, elts=None, with_l=False):
|
||||
|
||||
def get_list_type(l_line):
|
||||
l = []
|
||||
for i, line in enumerate(l_line):
|
||||
|
||||
m = re.search(p, line)
|
||||
if m:
|
||||
l.append([m.group(1), i])
|
||||
try:
|
||||
l[-2].append(i)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
l[-1].append(i + 1)
|
||||
return l
|
||||
|
||||
import re
|
||||
|
||||
# __ _
|
||||
# /__ _ _|_ _|_ ._ _ ._ _ _ _. |
|
||||
# \_| (/_ |_ | | (_) | | | _> (_| |
|
||||
# |
|
||||
conn = sqlite3.connect(self.db_path)
|
||||
c = conn.cursor()
|
||||
|
||||
if elts:
|
||||
cmd_ele = "AND " + " ".join(cond_sql_or("elt", elts))
|
||||
else:
|
||||
cmd_ele = ""
|
||||
|
||||
c.execute('''SELECT DISTINCT data from all_value
|
||||
WHERE name="{basis_name}" COLLATE NOCASE
|
||||
{cmd_ele}'''.format(basis_name=basis_name,
|
||||
cmd_ele=cmd_ele))
|
||||
|
||||
l_data_raw = c.fetchall()
|
||||
conn.close()
|
||||
|
||||
# |_| _. ._ _| | _ || | ||
|
||||
# | | (_| | | (_| | (/_ |_
|
||||
#
|
||||
|
||||
p = re.compile(ur'^(\w)\s+\d+\b')
|
||||
|
||||
l_data = []
|
||||
|
||||
for data_raw in l_data_raw:
|
||||
|
||||
basis = data_raw[0].strip()
|
||||
|
||||
l_line_raw = basis.split("\n")
|
||||
|
||||
l_line = [l_line_raw[0]]
|
||||
|
||||
for symmetry, begin, end in get_list_type(l_line_raw):
|
||||
|
||||
if not(with_l) and symmetry in "L":
|
||||
|
||||
body_s = []
|
||||
body_p = []
|
||||
|
||||
for i_l in l_line_raw[begin + 1:end]:
|
||||
|
||||
a = i_l.split()
|
||||
|
||||
common = "{:>3}".format(a[0])
|
||||
common += "{:>15.7f}".format(float(a[1]))
|
||||
|
||||
tail_s = common + "{:>23.7f}".format(float(a[2]))
|
||||
body_s.append(tail_s)
|
||||
|
||||
tail_p = common + "{:>23.7f}".format(float(a[3]))
|
||||
body_p.append(tail_p)
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "S")]
|
||||
l_line += body_s
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "P")]
|
||||
l_line += body_p
|
||||
else:
|
||||
l_line += l_line_raw[begin:end]
|
||||
|
||||
l_data.append("\n".join(l_line))
|
||||
|
||||
return l_data
|
||||
|
||||
|
||||
format_dict = \
|
||||
{
|
||||
"g94": "Gaussian94",
|
||||
"gamess-us": "GAMESS-US",
|
||||
"gamess-uk": "GAMESS-UK",
|
||||
"turbomole": "Turbomole",
|
||||
"tx93": "TX93",
|
||||
"molpro": "Molpro",
|
||||
"molproint": "MolproInt",
|
||||
"hondo": "Hondo",
|
||||
"supermolecule": "SuperMolecule",
|
||||
"molcas": "Molcas",
|
||||
"hyperchem": "HyperChem",
|
||||
"dalton": "Dalton",
|
||||
"demon-ks": "deMon-KS",
|
||||
"demon2k": "deMon2k",
|
||||
"aces2": "AcesII",
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
e = EMSL_local(db_path="EMSL.db")
|
||||
l = e.get_list_basis_available()
|
||||
for i in l:
|
||||
print i
|
||||
|
||||
l = e.get_list_element_available("pc-0")
|
||||
print l
|
||||
|
||||
l = e.get_basis("cc-pVTZ", ["H", "He"])
|
||||
for i in l:
|
||||
print i
|
0
src/misc/__init__.py
Normal file
0
src/misc/__init__.py
Normal file
@ -59,12 +59,20 @@ class Pattern(object):
|
||||
either = [list(child.children) for child in transform(self).children]
|
||||
for case in either:
|
||||
for e in [child for child in case if case.count(child) > 1]:
|
||||
if type(e) is Argument or type(e) is Option and e.argcount:
|
||||
if isinstance(
|
||||
e,
|
||||
Argument) or isinstance(
|
||||
e,
|
||||
Option) and e.argcount:
|
||||
if e.value is None:
|
||||
e.value = []
|
||||
elif type(e.value) is not list:
|
||||
elif not isinstance(e.value, list):
|
||||
e.value = e.value.split()
|
||||
if type(e) is Command or type(e) is Option and e.argcount == 0:
|
||||
if isinstance(
|
||||
e,
|
||||
Command) or isinstance(
|
||||
e,
|
||||
Option) and e.argcount == 0:
|
||||
e.value = 0
|
||||
return self
|
||||
|
||||
@ -84,10 +92,10 @@ def transform(pattern):
|
||||
if any(t in map(type, children) for t in parents):
|
||||
child = [c for c in children if type(c) in parents][0]
|
||||
children.remove(child)
|
||||
if type(child) is Either:
|
||||
if isinstance(child, Either):
|
||||
for c in child.children:
|
||||
groups.append([c] + children)
|
||||
elif type(child) is OneOrMore:
|
||||
elif isinstance(child, OneOrMore):
|
||||
groups.append(child.children * 2 + children)
|
||||
else:
|
||||
groups.append(child.children + children)
|
||||
@ -117,10 +125,10 @@ class LeafPattern(Pattern):
|
||||
left_ = left[:pos] + left[pos + 1:]
|
||||
same_name = [a for a in collected if a.name == self.name]
|
||||
if type(self.value) in (int, list):
|
||||
if type(self.value) is int:
|
||||
if isinstance(self.value, int):
|
||||
increment = 1
|
||||
else:
|
||||
increment = ([match.value] if type(match.value) is str
|
||||
increment = ([match.value] if isinstance(match.value, str)
|
||||
else match.value)
|
||||
if not same_name:
|
||||
match.value = increment
|
||||
@ -151,7 +159,7 @@ class Argument(LeafPattern):
|
||||
|
||||
def single_match(self, left):
|
||||
for n, pattern in enumerate(left):
|
||||
if type(pattern) is Argument:
|
||||
if isinstance(pattern, Argument):
|
||||
return n, Argument(self.name, pattern.value)
|
||||
return None, None
|
||||
|
||||
@ -169,7 +177,7 @@ class Command(Argument):
|
||||
|
||||
def single_match(self, left):
|
||||
for n, pattern in enumerate(left):
|
||||
if type(pattern) is Argument:
|
||||
if isinstance(pattern, Argument):
|
||||
if pattern.value == self.name:
|
||||
return n, Command(self.name, True)
|
||||
else:
|
0
src/parser/__init__.py
Normal file
0
src/parser/__init__.py
Normal file
52
src/parser/check_validity.py
Normal file
52
src/parser/check_validity.py
Normal file
@ -0,0 +1,52 @@
|
||||
# _
|
||||
# / |_ _ _ | _. | o _| o _|_
|
||||
# \_ | | (/_ (_ |< \/ (_| | | (_| | |_ \/
|
||||
# /
|
||||
# Do this After the L special case traitement.
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
def check_gamess(str_type):
|
||||
"""Check is the orbital type is handle by gamess"""
|
||||
|
||||
assert len(str_type) == 1
|
||||
|
||||
if str_type in "S P D".split():
|
||||
return True
|
||||
elif str_type == "SP":
|
||||
raise BaseException
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def check_NWChem(str_type):
|
||||
"""Check is the orbital type is handle by gamess"""
|
||||
|
||||
assert len(str_type) == 1
|
||||
|
||||
if str_type in "S P D".split():
|
||||
return True
|
||||
elif str_type > "I" or str_type in "K L M".split():
|
||||
raise BaseException
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
d_check = {"GAMESS-US": check_gamess,
|
||||
"NWChem": check_NWChem}
|
||||
|
||||
|
||||
def get_check_function(name_program):
|
||||
"""
|
||||
Tranforme SP special function (create using get_symmetry_function)
|
||||
into S and P
|
||||
"""
|
||||
try:
|
||||
f = d_check[name_program]
|
||||
except KeyError:
|
||||
str_ = "You need to add a check funtion for your program {0}"
|
||||
print >> sys.stderr, str_.format(name_program)
|
||||
print >> sys.stderr, "This one are avalaible {0}".format(d_check.keys())
|
||||
sys.exit(1)
|
||||
return f
|
138
src/parser/gamess_us.py
Normal file
138
src/parser/gamess_us.py
Normal file
@ -0,0 +1,138 @@
|
||||
# __
|
||||
# /__ _. ._ _ _ _ _ _
|
||||
# \_| (_| | | | (/_ _> _> |_| _>
|
||||
#
|
||||
|
||||
from src.parser_handler import get_dict_ele
|
||||
import re
|
||||
|
||||
|
||||
def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
|
||||
"""Parse the basis data raw html of gamess-us to get a nice tuple
|
||||
Return (name, description, [[ele, data_ele],...])"""
|
||||
basis_data = []
|
||||
|
||||
b = data.find("$DATA")
|
||||
e = data.find("$END")
|
||||
if (b == -1 or data.find("$DATA$END") != -1):
|
||||
if debug:
|
||||
print data
|
||||
raise Exception("WARNING not DATA")
|
||||
else:
|
||||
dict_replace = {"PHOSPHOROUS": "PHOSPHORUS",
|
||||
"D+": "E+",
|
||||
"D-": "E-"}
|
||||
|
||||
for k, v in dict_replace.iteritems():
|
||||
data = data.replace(k, v)
|
||||
|
||||
data = data[b + 5:e - 1].split('\n\n')
|
||||
|
||||
dict_ele = get_dict_ele()
|
||||
|
||||
for (elt, data_elt) in zip(elts, data):
|
||||
|
||||
elt_long_th = dict_ele[elt.lower()]
|
||||
elt_long_exp = data_elt.split()[0].lower()
|
||||
|
||||
if "$" in data_elt:
|
||||
if debug:
|
||||
print "Eror",
|
||||
raise Exception("WARNING bad split")
|
||||
|
||||
if elt_long_th == elt_long_exp:
|
||||
basis_data.append([elt, data_elt.strip()])
|
||||
else:
|
||||
if debug:
|
||||
print "th", elt_long_th
|
||||
print "exp", elt_long_exp
|
||||
print "abv", elt
|
||||
raise Exception("WARNING not a good ELEMENT")
|
||||
|
||||
return (name, des, basis_data)
|
||||
|
||||
|
||||
symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
|
||||
|
||||
|
||||
def l_symmetry_gamess_us(atom_basis):
|
||||
"""
|
||||
Return the begin and the end of all the type of orbital
|
||||
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
|
||||
output: [ [type, begin, end], ...]
|
||||
"""
|
||||
# Example
|
||||
# [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]"
|
||||
|
||||
l = []
|
||||
for i, line in enumerate(atom_basis):
|
||||
# Optimisation for not seaching all the time
|
||||
if len(line) < 10:
|
||||
m = re.search(symmetry_regex, line)
|
||||
if m:
|
||||
# Cause of L !
|
||||
read_symmetry = m.group(1)
|
||||
|
||||
# L is real L or special SP
|
||||
# Just check the number of exponant
|
||||
if all([read_symmetry == "L",
|
||||
len(atom_basis[i + 1].split()) == 4]):
|
||||
real_symmetry = "SP"
|
||||
else:
|
||||
real_symmetry = read_symmetry
|
||||
|
||||
l.append([real_symmetry, i])
|
||||
try:
|
||||
l[-2].append(i)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
l[-1].append(i + 1)
|
||||
return l
|
||||
|
||||
|
||||
def handle_l_gamess_us(l_atom_basis):
|
||||
"""
|
||||
Read l_atom_basis and change the SP in L and P
|
||||
"""
|
||||
|
||||
l_data = []
|
||||
for atom_basis in l_atom_basis:
|
||||
|
||||
# Split the data in line
|
||||
l_line_raw = atom_basis.split("\n")
|
||||
l_line = [l_line_raw[0]]
|
||||
# l_line_raw[0] containt the name of the Atom
|
||||
|
||||
for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw):
|
||||
|
||||
if symmetry == "SP":
|
||||
|
||||
body_s = []
|
||||
body_p = []
|
||||
|
||||
for i_l in l_line_raw[begin + 1:end]:
|
||||
|
||||
# one L => S & P
|
||||
a = i_l.split()
|
||||
|
||||
common = "{:>3}".format(a[0])
|
||||
common += "{:>15.7f}".format(float(a[1]))
|
||||
|
||||
tail_s = common + "{:>23.7f}".format(float(a[2]))
|
||||
body_s.append(tail_s)
|
||||
|
||||
tail_p = common + "{:>23.7f}".format(float(a[3]))
|
||||
body_p.append(tail_p)
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "S")]
|
||||
l_line += body_s
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "P")]
|
||||
l_line += body_p
|
||||
else:
|
||||
l_line += l_line_raw[begin:end]
|
||||
|
||||
l_data.append("\n".join(l_line))
|
||||
|
||||
return l_data
|
83
src/parser/gaussian94.py
Normal file
83
src/parser/gaussian94.py
Normal file
@ -0,0 +1,83 @@
|
||||
# __ _
|
||||
# /__ _. _ _ o _. ._ (_| |_|_
|
||||
# \_| (_| |_| _> _> | (_| | | | |
|
||||
#
|
||||
import sys
|
||||
|
||||
|
||||
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
|
||||
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
|
||||
|
||||
The data-pairs item is actually expected to be a 2 item list:
|
||||
[symbol, data]
|
||||
|
||||
e.g. ["Ca", "#BASIS SET..."]
|
||||
|
||||
N.B.: Currently ignores ECP data!
|
||||
|
||||
@param data: raw HTML from BSE
|
||||
@type data : unicode
|
||||
@param name: basis set name
|
||||
@type name : str
|
||||
@param des: basis set description
|
||||
@type des : str
|
||||
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
|
||||
@type elements : list
|
||||
@return: (name, description, data-pairs)
|
||||
@rtype : tuple
|
||||
"""
|
||||
|
||||
# Each basis set block starts and ends with ****. Find the region
|
||||
# containing all the basis blocks using the first and last ****.
|
||||
mark = "****"
|
||||
begin = data.find(mark)
|
||||
end = data.rfind(mark)
|
||||
|
||||
if begin == -1 or end == -1:
|
||||
if debug:
|
||||
print(data)
|
||||
str_ = " No basis set data found while attempting to process {0} ({1})"
|
||||
raise ValueError(str_.format(name, description))
|
||||
|
||||
trimmed = data[begin + len(mark): end - len(mark)].strip()
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by mark into per-element chunks
|
||||
for line in trimmed.split("\n"):
|
||||
if line.startswith(mark):
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another basis set block
|
||||
# also remove the marker lines from the chunk itself
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
|
||||
|
||||
# check each block for element and assign symbols to final pairs
|
||||
pairs = []
|
||||
unused_elements = set([e.upper() for e in elements])
|
||||
for chunk in chunks:
|
||||
# get first 3 chars of first line in block
|
||||
symbol = chunk.split("\n")[0][:3].strip()
|
||||
try:
|
||||
unused_elements.remove(symbol.upper())
|
||||
except KeyError:
|
||||
if debug:
|
||||
msg = "Warning: already processed {0}\n".format(symbol)
|
||||
sys.stderr.write(msg)
|
||||
pairs.append([symbol, chunk])
|
||||
|
||||
if unused_elements:
|
||||
msg = "Warning: elements {0} left over for {1}".format(
|
||||
list(unused_elements),
|
||||
name)
|
||||
print(msg)
|
||||
|
||||
return (name, description, pairs)
|
228
src/parser/nwchem.py
Normal file
228
src/parser/nwchem.py
Normal file
@ -0,0 +1,228 @@
|
||||
# _
|
||||
# |\ | / |_ _ ._ _
|
||||
# | \| \/\/ \_ | | (/_ | | |
|
||||
#
|
||||
import json
|
||||
|
||||
|
||||
def extract_basis_nwchem(data, name):
|
||||
"""Extract atomic orbital, charge density fitting, or exchange
|
||||
correlation functional basis data from a text region passed in as
|
||||
data. The charge density fitting and exchange correlation functional
|
||||
basis set data are employed for density functional calculations.
|
||||
|
||||
@param data: text region containing basis set data
|
||||
@type data : str
|
||||
@param name: name of basis type: "ao basis", "cd basis", or "xc basis"
|
||||
@type name : str
|
||||
@return: per-element basis set chunks
|
||||
@rtype : list
|
||||
"""
|
||||
|
||||
begin_marker = """BASIS "{0}" PRINT""".format(name)
|
||||
end_marker = "END"
|
||||
|
||||
# search for the basis set data begin marker
|
||||
# calling "upper" on data because original data has inconsistent
|
||||
# capitalization
|
||||
begin = data.upper().find(begin_marker.upper())
|
||||
end = data.upper().find(end_marker, begin)
|
||||
|
||||
# No basis data found
|
||||
if begin == -1:
|
||||
return []
|
||||
|
||||
trimmed = data[begin + len(begin_marker): end - len(end_marker)].strip()
|
||||
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by #BASIS SET... into per-element chunks
|
||||
for line in trimmed.split("\n"):
|
||||
if line.upper().startswith("#BASIS SET"):
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another #BASIS SET...
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join(c) for c in chunks]
|
||||
return chunks
|
||||
|
||||
|
||||
def extract_ecp_nwchem(data):
|
||||
"""Extract the effective core potential basis data from a text region
|
||||
passed in as data.
|
||||
|
||||
@param data: text region containing ECP data
|
||||
@type data : str
|
||||
@return: per-element effective core potential chunks
|
||||
@rtype : list
|
||||
"""
|
||||
|
||||
ecp_begin_mark = "ECP\n"
|
||||
ecp_end_mark = "END"
|
||||
ecp_begin = data.upper().find(ecp_begin_mark)
|
||||
ecp_end = data.upper().find(ecp_end_mark, ecp_begin)
|
||||
ecp_region = ""
|
||||
|
||||
if ecp_begin > -1 and ecp_end > -1:
|
||||
ecp_region = data[
|
||||
ecp_begin +
|
||||
len(ecp_begin_mark): ecp_end -
|
||||
len(ecp_end_mark)].strip()
|
||||
|
||||
# No ECP data, so return empty list
|
||||
else:
|
||||
return []
|
||||
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by XX nelec YY into chunks, e.g.
|
||||
# "Zn nelec 18" begins a zinc ECP
|
||||
for line in ecp_region.split("\n"):
|
||||
if line.lower().find(" nelec ") > -1:
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another XX nelec YY..
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join(c) for c in chunks]
|
||||
return chunks
|
||||
|
||||
|
||||
def unpack_nwchem_basis_block(data):
|
||||
"""Unserialize a NWChem basis data block and extract components
|
||||
|
||||
@param data: a JSON of basis set data, perhaps containing many types
|
||||
@type data : str
|
||||
@return: unpacked data
|
||||
@rtype : dict
|
||||
"""
|
||||
|
||||
unpacked = json.loads(data)
|
||||
return unpacked
|
||||
|
||||
|
||||
def parse_basis_data_nwchem(data, name, description, elements, debug=True):
|
||||
"""Parse the NWChem basis data raw html to get a nice tuple.
|
||||
|
||||
The data-pairs item is actually expected to be a 2 item list:
|
||||
[symbol, data]
|
||||
|
||||
e.g. ["Ca", "#BASIS SET..."]
|
||||
|
||||
@param data: raw HTML from BSE
|
||||
@type data : unicode
|
||||
@param name: basis set name
|
||||
@type name : str
|
||||
@param des: basis set description
|
||||
@type des : str
|
||||
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
|
||||
@type elements : list
|
||||
@return: (name, description, data-pairs)
|
||||
@rtype : tuple
|
||||
"""
|
||||
|
||||
unused_elements = set([e.upper() for e in elements])
|
||||
|
||||
def extract_symbol(txt):
|
||||
for sline in txt.split("\n"):
|
||||
if not sline.startswith("#"):
|
||||
try:
|
||||
symbol = sline[:3].strip().split()[0]
|
||||
return symbol
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
raise ValueError("Can't find element symbol in {0}".format(txt))
|
||||
|
||||
ao_chunks = extract_basis_nwchem(data, "ao basis")
|
||||
cd_chunks = extract_basis_nwchem(data, "cd basis")
|
||||
xc_chunks = extract_basis_nwchem(data, "xc basis")
|
||||
ecp_chunks = extract_ecp_nwchem(data)
|
||||
|
||||
if not any([ao_chunks, cd_chunks, xc_chunks, ecp_chunks]):
|
||||
str_ = "No basis set data found while attempting to process {0} ({1})"
|
||||
raise ValueError(str_.format(name, description))
|
||||
|
||||
# Tag all used elements, whether from ordinary AO basis or ECP section
|
||||
for chunk in ao_chunks + cd_chunks + xc_chunks + ecp_chunks:
|
||||
try:
|
||||
symbol = extract_symbol(chunk)
|
||||
unused_elements.remove(symbol.upper())
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if unused_elements:
|
||||
msg = "Warning: elements {0} left over for {1}"
|
||||
print msg.format(list(unused_elements), name)
|
||||
|
||||
# Form packed chunks, turn packed chunks into pairs
|
||||
used_elements = set()
|
||||
packed = {}
|
||||
|
||||
for cgroup, gname in [(ao_chunks, "ao basis"), (cd_chunks, "cd basis"),
|
||||
(xc_chunks, "xc basis"), (ecp_chunks, "ecp")]:
|
||||
for chunk in cgroup:
|
||||
symbol = extract_symbol(chunk)
|
||||
|
||||
# Expand entry, e.g. add ecp data for Na after it has ao basis
|
||||
try:
|
||||
idx, ch = packed[symbol]
|
||||
ch[gname] = chunk
|
||||
chunk_dict = ch.copy()
|
||||
# Create fresh entry, e.g. add Na with initial ao basis
|
||||
except KeyError:
|
||||
chunk_dict = {gname: chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
|
||||
"""
|
||||
for chunk in ao_chunks:
|
||||
symbol = extract_symbol(chunk)
|
||||
chunk_dict = {"ao basis" : chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
|
||||
for chunk in ecp_chunks:
|
||||
symbol = extract_symbol(chunk)
|
||||
#add ECP data if existing chunk, else create fresh chunk
|
||||
try:
|
||||
idx, ch = packed[symbol]
|
||||
ch["ecp"] = chunk
|
||||
chunk_dict = ch.copy()
|
||||
except KeyError:
|
||||
chunk_dict = {"ecp" : chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
"""
|
||||
|
||||
values = sorted(packed.values())
|
||||
|
||||
# Assign (Symbol, Serialized) to final pairs
|
||||
pairs = []
|
||||
for idx, chunk in values:
|
||||
symbol = extract_symbol(chunk.get("ao basis")
|
||||
or chunk.get("cd basis")
|
||||
or chunk.get("xc basis")
|
||||
or chunk.get("ecp"))
|
||||
serialized = json.dumps(chunk)
|
||||
pairs.append([symbol, serialized])
|
||||
return [name, description, pairs]
|
138
src/parser_handler.py
Normal file
138
src/parser_handler.py
Normal file
@ -0,0 +1,138 @@
|
||||
import sys
|
||||
import os
|
||||
import re
|
||||
|
||||
|
||||
def get_dict_ele():
|
||||
"""Return dict[atom]=[abreviation]"""
|
||||
elt_path = os.path.dirname(sys.argv[0]) + "/src/misc/elts_abrev.dat"
|
||||
|
||||
with open(elt_path, "r") as f:
|
||||
data = f.readlines()
|
||||
|
||||
dict_ele = dict()
|
||||
for i in data:
|
||||
l = i.split("-")
|
||||
dict_ele[l[1].strip().lower()] = l[2].strip().lower()
|
||||
|
||||
return dict_ele
|
||||
|
||||
# ______ _ _ _ _
|
||||
# | ___| | | | (_) | |
|
||||
# | |_ _ __ ___ _ __ ___ __ _| |_ __| |_ ___| |_
|
||||
# | _| '__/ _ \| '_ ` _ \ / _` | __| / _` | |/ __| __|
|
||||
# | | | | | (_) | | | | | | (_| | |_ | (_| | | (__| |_
|
||||
# \_| |_| \___/|_| |_| |_|\__,_|\__| \__,_|_|\___|\__|
|
||||
#
|
||||
from src.parser.gamess_us import parse_basis_data_gamess_us
|
||||
from src.parser.gaussian94 import parse_basis_data_gaussian94
|
||||
from src.parser.nwchem import parse_basis_data_nwchem
|
||||
|
||||
|
||||
parser_dict = {"Gaussian94": parse_basis_data_gaussian94,
|
||||
"GAMESS-US": parse_basis_data_gamess_us,
|
||||
"NWChem": parse_basis_data_nwchem,
|
||||
"GAMESS-UK": None,
|
||||
"Turbomole": None,
|
||||
"TX93": None,
|
||||
"Molpro": None,
|
||||
"MolproInt": None,
|
||||
"Hondo": None,
|
||||
"SuperMolecule": None,
|
||||
"Molcas": None,
|
||||
"HyperChem": None,
|
||||
"Dalton": None,
|
||||
"deMon-KS": None,
|
||||
"deMon2k": None,
|
||||
"AcesII": None}
|
||||
|
||||
|
||||
def check_format(format):
|
||||
try:
|
||||
parser_dict[format]
|
||||
except KeyError:
|
||||
str_ = ["This format ({0}) is not available in EMSL".format(format),
|
||||
"EMSL provide this list : {0}".format(parser_dict.keys())]
|
||||
print >> sys.stderr, "\n".join(str_)
|
||||
sys.exit(1)
|
||||
else:
|
||||
return format
|
||||
|
||||
|
||||
def get_parser_function(format):
|
||||
if not parser_dict[format]:
|
||||
list_parser = [k for k, v in parser_dict.iteritems() if v]
|
||||
|
||||
str_ = ["We have no parser for this format {0}".format(format),
|
||||
"We only support {0}".format(list_parser),
|
||||
"Fill free to Fock /pull request",
|
||||
"You just need to add a function like this one:",
|
||||
"'src.pars.gamess_us.parse_basis_data_gamess_us'"]
|
||||
print >> sys.stderr, "\n".join(str_)
|
||||
sys.exit(1)
|
||||
else:
|
||||
return parser_dict[format]
|
||||
|
||||
# _____ _ _ _ _
|
||||
# / ___| | | | (_) | |
|
||||
# \ `--. _ _ _ __ ___ _ __ ___ ___| |_ _ __ _ _ __| |_ ___| |_
|
||||
# `--. \ | | | '_ ` _ \| '_ ` _ \ / _ \ __| '__| | | | / _` | |/ __| __|
|
||||
# /\__/ / |_| | | | | | | | | | | | __/ |_| | | |_| | | (_| | | (__| |_
|
||||
# \____/ \__, |_| |_| |_|_| |_| |_|\___|\__|_| \__, | \__,_|_|\___|\__|
|
||||
# __/ | __/ |
|
||||
# |___/ |___/
|
||||
|
||||
"""
|
||||
Return the begin and the end of all the type of orbital
|
||||
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
|
||||
output: [ [type, begin, end], ...]
|
||||
"""
|
||||
|
||||
from src.parser.gamess_us import l_symmetry_gamess_us
|
||||
|
||||
symmetry_dict = {"GAMESS-US": l_symmetry_gamess_us}
|
||||
|
||||
|
||||
def get_symmetry_function(format):
|
||||
"""
|
||||
Return the begin and the end of all the type of orbital
|
||||
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
|
||||
output: [ [type, begin, end], ...]
|
||||
"""
|
||||
try:
|
||||
f = symmetry_dict[format]
|
||||
except KeyError:
|
||||
print >> sys.stderr, "You need to add a function in symmetry_dict"
|
||||
print >> sys.stderr, "for your format ({0})".format(format)
|
||||
sys.exit(1)
|
||||
else:
|
||||
return f
|
||||
|
||||
# _ _ _ _ _ _ _ _ _ ______ _ _
|
||||
# | | | | | | | ( | ) | ( | ) | _ (_) | |
|
||||
# | |_| | __ _ _ __ __| | | ___ V V| | V V | | | |_ ___| |_
|
||||
# | _ |/ _` | '_ \ / _` | |/ _ \ | | | | | | |/ __| __|
|
||||
# | | | | (_| | | | | (_| | | __/ | |____ | |/ /| | (__| |_
|
||||
# \_| |_/\__,_|_| |_|\__,_|_|\___| \_____/ |___/ |_|\___|\__|
|
||||
|
||||
"""
|
||||
Tranforme SP special function (create using get_symmetry_function) into S and P
|
||||
"""
|
||||
from src.parser.gamess_us import handle_l_gamess_us
|
||||
|
||||
handle_l_dict = {"GAMESS-US": handle_l_gamess_us}
|
||||
|
||||
|
||||
def get_handle_l_function(format):
|
||||
"""
|
||||
Tranforme SP special function (create using get_symmetry_function)
|
||||
into S and P
|
||||
"""
|
||||
try:
|
||||
f = handle_l_dict[format]
|
||||
except KeyError:
|
||||
print >> sys.stderr, "You need to add a function in handle_l_dict"
|
||||
print >> sys.stderr, "for your format ({0})".format(format)
|
||||
sys.exit(1)
|
||||
else:
|
||||
return f
|
Loading…
Reference in New Issue
Block a user