Merge pull request #3 from TApplencourt/master

Major change  ! Read descritpion
This commit is contained in:
Anthony Scemama 2015-03-18 17:54:44 +01:00
commit 839391ffe7
15 changed files with 1436 additions and 524 deletions

View File

@ -4,19 +4,22 @@
"""EMSL Api.
Usage:
EMSL_api.py list_basis [--atom=atom_name...]
[--db_path=db_path]
EMSL_api.py list_atoms --basis=basis_name
[--db_path=db_path]
EMSL_api.py get_basis_data --basis=basis_name
[--atom=atom_name...]
[--db_path=db_path]
[--with_l]
[(--save [--path=path])]
EMSL_api.py list_basis [--basis=<basis_name>...]
[--atom=<atom_name>...]
[--db_path=<db_path>]
[--average_mo_number]
EMSL_api.py list_atoms --basis=<basis_name>
[--db_path=<db_path>]
EMSL_api.py get_basis_data --basis=<basis_name>
[--atom=<atom_name>...]
[--db_path=<db_path>]
[(--save [--path=<path>])]
[--check=<program_name>]
[--treat_l]
EMSL_api.py list_formats
EMSL_api.py create_db --db_path=db_path
--format=format
[--no-contraction]
EMSL_api.py create_db --format=<format>
[--db_path=<db_path>]
[--no-contraction]
EMSL_api.py (-h | --help)
EMSL_api.py --version
@ -27,75 +30,100 @@ Options:
<db_path> is the path to the SQLite3 file containing the Basis sets.
By default is $EMSL_API_ROOT/db/Gausian_uk.db
Example of use:
./EMSL_api.py list_basis --atom Al --atom U
./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
./EMSL_api.py list_atoms --basis ANO-RCC
./EMSL_api.py get_basis_data --basis 3-21++G*
"""
version = "0.2.0"
version = "0.8.1"
import sys
import os
from src.docopt import docopt
from src.EMSL_utility import EMSL_dump
from src.EMSL_utility import format_dict
from src.EMSL_utility import EMSL_local
from src.misc.docopt import docopt
from src.EMSL_dump import EMSL_dump
from src.EMSL_local import EMSL_local, checkSQLite3
if __name__ == '__main__':
arguments = docopt(__doc__, version='EMSL Api ' + version)
# ___
# | ._ o _|_
# _|_ | | | |_
#
if arguments["--db_path"]:
db_path = arguments["--db_path"]
else:
import os
db_path = os.path.dirname(__file__) + "/db/Gamess-us.db"
db_path = os.path.dirname(__file__) + "/db/GAMESS-US.db"
# Check the db
try:
if not(arguments['create_db']):
db_path, db_path_changed = checkSQLite3(db_path)
except:
raise
# _ _ _ ______ _
# | | (_) | | | ___ \ (_)
# | | _ ___| |_ | |_/ / __ _ ___ _ ___
# | | | / __| __| | ___ \/ _` / __| / __|
# | |___| \__ \ |_ | |_/ / (_| \__ \ \__ \
# \_____/_|___/\__| \____/ \__,_|___/_|___/
# _ _ _ ______ _
#| | (_) | | | ___ \ (_)
#| | _ ___| |_ | |_/ / __ _ ___ _ ___
#| | | / __| __| | ___ \/ _` / __| / __|
#| |___| \__ \ |_ | |_/ / (_| \__ \ \__ \
#\_____/_|___/\__| \____/ \__,_|___/_|___/
#
if arguments["list_basis"]:
e = EMSL_local(db_path=db_path)
elts = arguments["--atom"]
l = e.get_list_basis_available(elts)
l = e.list_basis_available(arguments["--atom"],
arguments["--basis"],
arguments["--average_mo_number"])
for name, des in l:
print name, "|", des
if arguments["--average_mo_number"]:
for name, des, avg in l:
print "- '{}' ({}) || {:<50}".format(name, avg, des)
else:
for name, des in l:
print "- '{}' || {:<50}".format(name, des)
# _ _ _ _____ _ _
#| | (_) | | | ___| | | |
#| | _ ___| |_ | |__ | | ___ _ __ ___ ___ _ __ | |_ ___
#| | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
#| |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \
#\_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
if arguments["list_atoms"]:
# _ _ _ _____ _ _
# | | (_) | | | ___| | | |
# | | _ ___| |_ | |__ | | ___ _ __ ___ ___ _ __ | |_ ___
# | | | / __| __| | __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
# | |___| \__ \ |_ | |___| | __/ | | | | | __/ | | | |_\__ \
# \_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
elif arguments["list_atoms"]:
e = EMSL_local(db_path=db_path)
basis_name = arguments["--basis"]
l = e.get_list_element_available(basis_name)
print ", ".join(l)
#______ _ _ _
#| ___ \ (_) | | | |
#| |_/ / __ _ ___ _ ___ __| | __ _| |_ __ _
#| ___ \/ _` / __| / __| / _` |/ _` | __/ _` |
#| |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
#\____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_|
if arguments["get_basis_data"]:
# ______ _ _ _
# | ___ \ (_) | | | |
# | |_/ / __ _ ___ _ ___ __| | __ _| |_ __ _
# | ___ \/ _` / __| / __| / _` |/ _` | __/ _` |
# | |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
# \____/ \__,_|___/_|___/ \__,_|\__,_|\__\__,_|
elif arguments["get_basis_data"]:
e = EMSL_local(db_path=db_path)
basis_name = arguments["--basis"]
basis_name = arguments["--basis"][0]
elts = arguments["--atom"]
l = e.get_basis(basis_name, elts,arguments["--with_l"])
str_ = "\n\n".join(l) + "\n"
l_atom_basis = e.get_basis(basis_name, elts,
arguments["--treat_l"],
arguments["--check"])
# Add separation between atoms, and a empty last line
str_ = "\n\n".join(l_atom_basis) + "\n"
if arguments["--save"]:
if arguments["--path"]:
path = arguments["--path"]
else:
# The defaut path is bais
path = "_".join([basis_name, ".".join(elts)])
path = "/tmp/" + path + ".bs"
@ -105,32 +133,39 @@ if __name__ == '__main__':
else:
print str_
# _ _ _ __ _
#| | (_) | | / _| | |
#| | _ ___| |_ | |_ ___ _ __ _ __ ___ __ _| |_ ___
#| | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __|
#| |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \
#\_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/
if arguments["list_formats"]:
for i in format_dict:
# _ _ _ __ _
# | | (_) | | / _| | |
# | | _ ___| |_ | |_ ___ _ __ _ __ ___ __ _| |_ ___
# | | | / __| __| | _/ _ \| '__| '_ ` _ \ / _` | __/ __|
# | |___| \__ \ |_ | || (_) | | | | | | | | (_| | |_\__ \
# \_____/_|___/\__| |_| \___/|_| |_| |_| |_|\__,_|\__|___/
elif arguments["list_formats"]:
e = EMSL_dump()
for i in e.get_list_format():
print i
# _____ _ _ _
#/ __ \ | | | | |
#| / \/_ __ ___ __ _| |_ ___ __| | |__
#| | | '__/ _ \/ _` | __/ _ \ / _` | '_ \
#| \__/\ | | __/ (_| | || __/ | (_| | |_) |
# \____/_| \___|\__,_|\__\___| \__,_|_.__/
if arguments["create_db"]:
# _____ _ _ _
# / __ \ | | | | |
# | / \/_ __ ___ __ _| |_ ___ __| | |__
# | | | '__/ _ \/ _` | __/ _ \ / _` | '_ \
# | \__/\ | | __/ (_| | || __/ | (_| | |_) |
# \____/_| \___|\__,_|\__\___| \__,_|_.__/
elif arguments["create_db"]:
db_path = arguments["--db_path"]
format = arguments["--format"]
if format not in format_dict:
print "Format %s doesn't exist. Run list_formats to get the list of formats." % (format)
sys.exit(1)
contraction = not arguments["--no-contraction"]
e = EMSL_dump(
db_path=db_path,
format=format_dict[format],
contraction=contraction)
e = EMSL_dump(db_path=db_path,
format=format,
contraction=contraction)
e.new_db()
# _
# / | _ _. ._ o ._ _
# \_ | (/_ (_| | | | | | (_|
# _|
# Clean up on exit
if not(arguments['create_db']) and db_path_changed:
os.system("rm -f /dev/shm/%d.db" % (os.getpid()))

View File

@ -4,28 +4,34 @@ EMSL_Basis_Set_Exchange_Local
Create of Local Copy of the famous [EMSL Basis Set Exchange](https://bse.pnl.gov/bse/portal) and use it easily with the API.
* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website (One database for all the basis set of one format);
* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website. Currently avalaible format are :
* Gamess-us, Gaussian94 and NEWCHEM;
* API for scripting;
* Quick local access without delay;
* Only need [Python](https://www.python.org/) and [Request](http://docs.python-requests.org/en/latest/) module.
* Only need [Python](https://www.python.org/)
##Dependencies
* Python >2.6
* Request ```pip install requests``` (in a virtual env or with sudo)
###### Optional
If you plan to download manually some database -not using the pre existing one- you need :
* [Request](http://docs.python-requests.org/en/latest/) python module. ```$pip install requests``` (do it in a virtual env or with sudo)
##Installation
* Download the git (```$ git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
* Done ! You can now, use ```EMSL_api.py```
* Download the git repertory (```$git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
* That all! You can now, use ```EMSL_api.py```
##Usage
```
EMSL Api.
Usage:
EMSL_api.py list_basis [--atom=<atom_name>...]
[--db_path=<db_path>]
EMSL_api.py list_basis [--basis=<basis_name>...]
[--atom=<atom_name>...]
[--db_path=<db_path>]
[--average_mo_number]
EMSL_api.py list_atoms --basis=<basis_name>
[--db_path=<db_path>]
[--db_path=<db_path>]
EMSL_api.py get_basis_data --basis=<basis_name>
[--atom=<atom_name>...]
[--db_path=<db_path>]
@ -45,6 +51,12 @@ Options:
<db_path> is the path to the SQLite3 file containing the Basis sets.
By default is $EMSL_API_ROOT/db/Gausian_uk.db
Example of use:
./EMSL_api.py list_basis --atom Al --atom U
./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
./EMSL_api.py list_atoms --basis ANO-RCC
./EMSL_api.py get_basis_data --basis 3-21++G*
```
##Demonstration
@ -53,14 +65,47 @@ By default is $EMSL_API_ROOT/db/Gausian_uk.db
(For a beter quality see the [Source](https://asciinema.org/api/asciicasts/15380))
##To do
For now we can only parse Gaussian-US basis set type file. (Look at ```./src/EMSL_utility.py#EMSL_dump.basis_data_row_to_array```)
For now we can only parse `Gamess-us, Gaussian94 and NEWCHEM` (Thanks to @mattbernst for Gaussian94 and NEWCHEM) basis set type file.
###I need more format!
I realy simple. Just read the few explanation bellow.
You just need to provide a function who will split the basis data who containt all the atoms in atom only tuple.
Sommething like this:
```python
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
The data-pairs item is actually expected to be a 2 item list:
[symbol, data]
e.g. ["Ca", "#BASIS SET..."]
N.B.: Currently ignores ECP data!
@param data: raw HTML from BSE
@type data : unicode
@param name: basis set name
@type name : str
@param des: basis set description
@type des : str
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
@type elements : list
@return: (name, description, data-pairs)
@rtype : tuple
"""
```
Then just add the function in `src.parser_handler.format_dict`. You are ready to go!
Feel free to fork/pull request.
##Disclaimer
It'is not a official API. Use it with moderation.
In papers where you use the basis sets obtained from the Basis Set Exchange please site this :
In papers where you use the basis sets obtained from the Basis Set Exchange please site this:
>The Role of Databases in Support of Computational Chemistry Calculations
>
>>--<cite>Feller, D.; J. Comp. Chem., 17(13), 1571-1586, 1996.</cite>

Binary file not shown.

303
src/EMSL_dump.py Normal file
View File

@ -0,0 +1,303 @@
import os
import sys
import re
import time
import sqlite3
from collections import OrderedDict
def install_with_pip(name):
ins = False
d = {'y': True,
'n': False}
while True:
choice = raw_input('Do you want to install it ? [Y/N]')
try:
ins = d[choice.lower()]
break
except:
print "not a valid choice"
if ins:
try:
import pip
pip.main(['install', name])
except:
print "You need pip"
print "(http://pip.readthedocs.org/en/latest/installing.html)"
sys.exit(1)
class EMSL_dump:
"""
This call implement all you need for download the EMSL and save it localy
"""
def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
from src.parser_handler import get_parser_function
from src.parser_handler import check_format
self.format = check_format(format)
self.parser = get_parser_function(self.format)
if db_path:
self.db_path = db_path
else:
head_path = os.path.dirname(__file__)
self.db_path = "{0}/../db/{1}.db".format(head_path, self.format)
self.contraction = str(contraction)
self.debug = False
try:
import requests
except:
print "You need the requests package"
install_with_pip("requests")
finally:
self.requests = requests
def get_list_format(self):
"""List all the format available in EMSL"""
from src.parser_handler import parser_dict
return parser_dict.keys()
def set_db_path(self, path):
"""Define the database path"""
self.db_path = path
def dwl_basis_list_raw(self):
"""Return the source code of the iframe
who contains the list of the basis set available"""
print "Download all the name available in EMSL."
print "It can take some time.",
sys.stdout.flush()
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
if self.debug:
import cPickle as pickle
dbcache = 'db/cache'
if not os.path.isfile(dbcache):
page = self.requests.get(url).text
file = open(dbcache, 'w')
pickle.dump(page, file)
else:
file = open(dbcache, 'r')
page = pickle.load(file)
file.close()
else:
page = self.requests.get(url).text
print "Done"
return page
def basis_list_raw_to_array(self, data_raw):
"""Parse the raw html basis set to create a dict
will all the information for dowloanding the database :
Return d[name] = [name, xml_path, description,
lits of the elements available]
Explanation of tuple data from 'tup' by index:
0 - path to xml file
1 - basis set name
2 - categorization: "dftcfit", "dftorb", "dftxfit", "diffuse",
"ecporb","effective core potential", "orbital", "polarization",
"rydberg", or "tight"
3 - parameterized elements by symbol e.g. '[H, He, B, C, N, O, F, Ne]'
4 - curation status; only 'published' is trustworthy
5 - boolean: has ECP
6 - boolean: has spin
7 - last modified date
8 - name of primary developer
9 - name of contributor
10 - human-readable summary/description of basis set
"""
d = OrderedDict()
for line in data_raw.split('\n'):
if "new basisSet(" in line:
b = line.find("(")
e = line.find(");")
s = line[b + 1:e]
tup = eval(s)
xml_path = tup[0]
# non-published (e.g. rejected) basis sets and ecp should be
# ignored
if tup[4] != "published" or "-ecp" in xml_path.lower():
continue
name = tup[1]
elts = re.sub('[["\ \]]', '', tup[3]).split(',')
des = re.sub('\s+', ' ', tup[-1])
d[name] = [name, xml_path, des, elts]
return d
# _____ _
# / __ \ | |
# | / \/_ __ ___ __ _| |_ ___
# | | | '__/ _ \/ _` | __/ _ \
# | \__/\ | | __/ (_| | || __/
# \____/_| \___|\__,_|\__\___|
#
def create_sql(self, dict_basis_list):
"""Create the sql from strach.
Take the list of basis available data,
download her, put her in sql"""
if os.path.isfile(self.db_path):
print >> sys.stderr, "FAILLURE:"
print >> sys.stderr, "{0} file alredy exist.".format(self.db_path),
print >> sys.stderr, "Delete or remove it"
sys.exit(1)
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute('''CREATE TABLE basis_tab(
basis_id INTEGER PRIMARY KEY AUTOINCREMENT,
name text,
description text,
UNIQUE(name)
);''')
c.execute('''CREATE TABLE data_tab(
basis_id INTEGER,
elt TEXT,
data TEXT,
FOREIGN KEY(basis_id)
REFERENCES basis_tab(basis_id)
);''')
c.execute('''CREATE TABLE format_tab(format TEXT)''')
c.execute('''INSERT INTO format_tab VALUES (?)''', [self.format])
conn.commit()
c.execute(''' CREATE VIEW output_tab AS
SELECT basis_id,
name,
description,
elt,
data
FROM basis_tab
NATURAL JOIN data_tab
''')
import Queue
import threading
num_worker_threads = 7
attemps_max = 20
q_in = Queue.Queue(num_worker_threads)
q_out = Queue.Queue(num_worker_threads)
def worker():
"""get a Job from the q_in, do stuff,
when finish put it in the q_out"""
while True:
name, path_xml, des, elts = q_in.get()
url = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
url += "/eventSubmit_doDownload/true"
params = {'bsurl': path_xml, 'bsname': name,
'elts': " ".join(elts),
'format': self.format,
'minimize': self.contraction}
attemps = 0
while attemps < attemps_max:
text = self.requests.get(url, params=params).text
try:
basis_data = self.parser(text, name, des, elts,
self.debug)
except:
time.sleep(0.1)
attemps += 1
else:
break
try:
q_out.put(basis_data)
except:
if self.debug:
print "Fail on q_out.put", basis_data
raise
else:
q_in.task_done()
def enqueue():
for [name, path_xml, des, elts] in dict_basis_list.itervalues():
q_in.put([name, path_xml, des, elts])
return 0
t = threading.Thread(target=enqueue)
t.daemon = True
t.start()
for i in range(num_worker_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
nb_basis = len(dict_basis_list)
for i in range(nb_basis):
name, des, basis_data = q_out.get()
q_out.task_done()
str_indice = '{:>3}'.format(i + 1)
str_ = '{0} / {1} | {2}'.format(str_indice, nb_basis, name)
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
# A d d _ t h e _ b a s i s _ n a m e #
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
try:
cmd = "INSERT INTO basis_tab(name,description) VALUES (?,?)"
c.execute(cmd, [name, des])
conn.commit()
except sqlite3.IntegrityError:
print str_, "Fail"
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
# A d d _ t h e _ b a s i s _ d a t a #
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
id_ = [c.lastrowid]
try:
cmd = "INSERT INTO data_tab(basis_id,elt,data) VALUES (?,?,?)"
c.executemany(cmd, [id_ + k for k in basis_data])
conn.commit()
except sqlite3.IntegrityError:
print str_, "Fail"
else:
print str_
conn.close()
q_in.join()
def new_db(self):
"""Create new_db from scratch"""
_data = self.dwl_basis_list_raw()
array_basis = self.basis_list_raw_to_array(_data)
self.create_sql(array_basis)

317
src/EMSL_local.py Executable file
View File

@ -0,0 +1,317 @@
# -*- coding: utf-8 -*-
import sqlite3
import re
import sys
import os
def checkSQLite3(db_path):
"""Check if the db_path is a good one"""
from os.path import isfile, getsize
db_path = os.path.expanduser(db_path)
db_path = os.path.expandvars(db_path)
db_path = os.path.abspath(db_path)
# Check if db file is readable
if not os.access(db_path, os.R_OK):
print >>sys.stderr, "Db file %s is not readable" % (db_path)
raise IOError
if not isfile(db_path):
print >>sys.stderr, "Db file %s is not... a file!" % (db_path)
raise IOError
if getsize(db_path) < 100: # SQLite database file header is 100 bytes
print >>sys.stderr, "Db file %s is not a SQLite file!" % (db_path)
raise IOError
with open(db_path, 'rb') as fd:
header = fd.read(100)
if header[:16] != 'SQLite format 3\x00':
print >>sys.stderr, "Db file %s is not in SQLiteFormat3!" % (db_path)
raise IOError
# Check if the file system allows I/O on sqlite3 (lustre)
# If not, copy on /dev/shm and remove after opening
try:
EMSL_local(db_path=db_path).list_basis_available()
except sqlite3.OperationalError:
print >>sys.stderr, "I/O Error for you file system"
print >>sys.stderr, "Try some fixe"
new_db_path = "/dev/shm/%d.db" % (os.getpid())
os.system("cp %s %s" % (db_path, new_db_path))
db_path = new_db_path
else:
changed = False
return db_path, changed
# Try again to check
try:
EMSL_local(db_path=db_path).list_basis_available()
except:
print >>sys.stderr, "Sorry..."
os.system("rm -f /dev/shm/%d.db" % (os.getpid()))
raise
else:
print >>sys.stderr, "Working !"
changed = True
return db_path, changed
def cond_sql_or(table_name, l_value, glob=False):
"""Take a table_name, a list of value and create the sql or combande"""
opr = "GLOB" if glob else "="
return [" OR ".join(['{} {} "{}"'.format(table_name,
opr,
val) for val in l_value])]
def string_to_nb_mo(str_type):
"""Take a string and return the nb of orbital"""
d = {"S": 3,
"P": 5,
"D": 7,
"SP": 8}
if str_type in d:
return d[str_type]
# ord("F") = 70 and ord("Z") = 87
elif 70 <= ord(str_type) <= 87:
# ord("F") = 70 and l = 4 so ofset if 66
return 2 * (ord(str_type) - 66) + 1
else:
raise BaseException
# _ __
# |_ |\/| (_ | | _ _ _. |
# |_ | | __) |_ |_ (_) (_ (_| |
#
class EMSL_local:
"""
All the method for using the EMSL db localy
"""
def __init__(self, db_path=None):
self.db_path = db_path
self.conn = sqlite3.connect(self.db_path)
self.c = self.conn.cursor()
self.c.execute("SELECT * from format_tab")
self.format = self.c.fetchone()[0]
def list_basis_available(self,
elts=[],
basis=[],
average_mo_number=False):
"""
return all the basis name who contant all the elts
"""
# If not elts just get the distinct name
# Else: 1) fetch for geting all the run_id whos satisfy the condition
# 2) If average_mo_number:
# * Get name,descripption,data
# * Then parse it
# Else Get name,description
# 3) Parse it
# ~#~#~#~#~#~ #
# F i l t e r #
# ~#~#~#~#~#~ #
if basis:
cmd_filter_basis = " ".join(cond_sql_or("name", basis, glob=True))
else:
cmd_filter_basis = "(1)"
# Not Ets
if not elts:
if not average_mo_number:
cmd = """SELECT DISTINCT name, description
FROM basis_tab
WHERE {0}"""
else:
cmd = """SELECT DISTINCT name, description, data
FROM output_tab
WHERE {0}"""
cmd = cmd.format(cmd_filter_basis)
else:
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
# G e t t i n g _ B a s i s I d #
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
str_ = """SELECT DISTINCT basis_id
FROM output_tab
WHERE elt=? AND {0}""".format(cmd_filter_basis)
cmd = " INTERSECT ".join([str_] * len(elts)) + ";"
self.c.execute(cmd, elts)
l_basis_id = [i[0] for i in self.c.fetchall()]
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
# C r e a t e _ t h e _ c m d #
# ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
cmd_filter_basis = " ".join(cond_sql_or("basis_id", l_basis_id))
cmd_filter_ele = " ".join(cond_sql_or("elt", elts))
column_to_fech = "name, description"
if average_mo_number:
column_to_fech += ", data"
filter_where = " ({}) AND ({})".format(
cmd_filter_ele,
cmd_filter_basis)
cmd = """SELECT DISTINCT {0}
FROM output_tab
WHERE {1}
ORDER BY name""".format(column_to_fech, filter_where)
# ~#~#~#~#~ #
# F e t c h #
# ~#~#~#~#~ #
self.c.execute(cmd)
info = self.c.fetchall()
# ~#~#~#~#~#~#~ #
# P a r s i n g #
# ~#~#~#~#~#~#~ #
# If average_mo_number is asking
from collections import OrderedDict
dict_info = OrderedDict()
# Description : dict_info[name] = [description, nb_mo, nb_ele]
from src.parser_handler import get_symmetry_function
if average_mo_number:
f_symmetry = get_symmetry_function(self.format)
for name, description, atom_basis in info:
nb_mo = 0
line = atom_basis.split("\n")
for type_, _, _ in f_symmetry(line):
nb_mo += string_to_nb_mo(type_)
try:
dict_info[name][1] += nb_mo
dict_info[name][2] += 1.
except KeyError:
dict_info[name] = [description, nb_mo, 1.]
# ~#~#~#~#~#~ #
# R e t u r n #
# ~#~#~#~#~#~ #
if average_mo_number:
return[[k, v[0], str(v[1] / v[2])] for k, v in dict_info.iteritems()]
else:
return [i[:] for i in info]
def get_list_element_available(self, basis_name):
# ~#~#~#~#~#~ #
# F i l t e r #
# ~#~#~#~#~#~ #
str_ = """SELECT DISTINCT elt
FROM output_tab
WHERE name=(?) COLLATE NOCASE"""
# ~#~#~#~#~ #
# F e t c h #
# ~#~#~#~#~ #
self.c.execute(str_, basis_name)
# ~#~#~#~#~#~ #
# R e t u r n #
# ~#~#~#~#~#~ #
return [str(i[0]) for i in self.c.fetchall()]
def get_basis(self,
basis_name, elts=None,
handle_l_format=False, check_format=None):
"""
Return the data from the basis set
"""
# ~#~#~#~#~#~ #
# F i l t e r #
# ~#~#~#~#~#~ #
cmd_filter_ele = " ".join(cond_sql_or("elt", elts)) if elts else "(1)"
self.c.execute('''SELECT DISTINCT data from output_tab
WHERE name="{0}"
AND {1}'''.format(basis_name, cmd_filter_ele))
# We need to take i[0] because fetchall return a tuple [(value),...]
l_atom_basis = [i[0].strip() for i in self.c.fetchall()]
# ~#~#~#~#~#~#~#~ #
# h a n d l e _ f #
# ~#~#~#~#~#~#~#~ #
if handle_l_format:
from src.parser_handler import get_handle_l_function
f = get_handle_l_function(self.format)
l_atom_basis = f(l_atom_basis)
# ~#~#~#~#~ #
# C h e c k #
# ~#~#~#~#~ #
if check_format:
from src.parser_handler import get_symmetry_function
from src.parser.check_validity import get_check_function
f = get_check_function(check_format)
f_symmetry = get_symmetry_function(self.format)
for atom_basis in l_atom_basis:
lines = atom_basis.split("\n")
for type_, _, _ in f_symmetry(lines):
try:
f(type_)
except AssertionError:
print "False. You have somme special function like SP"
sys.exit(1)
except BaseException:
print "Fail !"
sys.exit(1)
# ~#~#~#~#~#~ #
# R e t u r n #
# ~#~#~#~#~#~ #
return l_atom_basis
if __name__ == "__main__":
e = EMSL_local(db_path="EMSL.db")
l = e.get_list_basis_available()
for i in l:
print i
l = e.get_list_element_available("pc-0")
print l
l = e.get_basis("cc-pVTZ", ["H", "He"])
for i in l:
print i

View File

@ -1,435 +0,0 @@
# -*- coding: utf-8 -*-
import sqlite3
import re
import sys
import os
import time
debug = True
elt_path = os.path.dirname(sys.argv[0]) + "/src/elts_abrev.dat"
with open(elt_path, "r") as f:
data = f.readlines()
dict_ele = dict()
for i in data:
l = i.split("-")
dict_ele[l[1].strip().lower()] = l[2].strip().lower()
def install_with_pip(name):
ins = False
d = {'y': True,
'n': False}
while True:
choice = raw_input('Do you want to install it ? [Y/N]')
try:
ins = d[choice.lower()]
break
except:
print "not a valid choice"
if ins:
try:
import pip
pip.main(['install', name])
except:
print "You need pip, (http://pip.readthedocs.org/en/latest/installing.html)"
sys.exit(1)
def cond_sql_or(table_name, l_value):
l = []
dmy = " OR ".join(['%s = "%s"' % (table_name, i) for i in l_value])
if dmy:
l.append("(%s)" % dmy)
return l
class EMSL_dump:
def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
self.db_path = db_path
self.format = format
self.contraction = str(contraction)
try:
import requests
except:
print "You need the requests package"
install_with_pip("requests")
finally:
self.requests = requests
def set_db_path(self, path):
"""Define the database path"""
self.db_path = path
def dwl_basis_list_raw(self):
print "Download all the name available in EMSL. It can take some time.",
sys.stdout.flush()
"""Download the source code of the iframe who contains the list of the basis set available"""
url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
if debug:
import cPickle as pickle
dbcache = 'db/cache'
if not os.path.isfile(dbcache):
page = self.requests.get(url).text
file = open(dbcache, 'w')
pickle.dump(page, file)
else:
file = open(dbcache, 'r')
page = pickle.load(file)
file.close()
else:
page = self.requests.get(url).text
print "Done"
return page
def bl_raw_to_array(self, data_raw):
"""Parse the raw html to create a basis set array whith all the info:
url, name,description"""
d = {}
for line in data_raw.split('\n'):
if "new basisSet(" in line:
b = line.find("(")
e = line.find(");")
s = line[b + 1:e]
tup = eval(s)
url = tup[0]
name = tup[1]
junkers = re.compile('[[" \]]')
elts = junkers.sub('', tup[3]).split(',')
des = tup[-1]
if "-ecp" in url.lower():
continue
d[name] = [name, url, des, elts]
"""Tric for the unicity of the name"""
array = [d[key] for key in d]
array_sort = sorted(array, key=lambda x: x[0])
print len(array_sort), "basisset will be download"
return array_sort
def create_url(self, url, name, elts):
"""Create the adequate url to get the basis data"""
elts_string = " ".join(elts)
path = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
path += "/eventSubmit_doDownload/true"
path += "?bsurl=" + url
path += "&bsname=" + name
path += "&elts=" + elts_string
path += "&format=" + self.format
path += "&minimize=" + self.contraction
return path
def basis_data_row_to_array(self, data, name, des, elts):
"""Parse the basis data raw html to get a nice tuple"""
d = []
b = data.find("$DATA")
e = data.find("$END")
if (b == -1 or data.find("$DATA$END") != -1):
if debug:
print data
raise Exception("WARNING not DATA")
else:
data = data.replace("PHOSPHOROUS", "PHOSPHORUS")
data = data.replace("D+", "E+")
data = data.replace("D-", "E-")
data = data[b + 5:e - 1].split('\n\n')
for (elt, data_elt) in zip(elts, data):
elt_long_th = dict_ele[elt.lower()]
elt_long_exp = data_elt.split()[0].lower()
if "$" in data_elt:
print "Eror",
raise Exception("WARNING not bad split")
if elt_long_th == elt_long_exp:
d.append((name, des, elt, data_elt.strip()))
else:
print "th", elt_long_th
print "exp", elt_long_exp
print "abv", elt
raise Exception("WARNING not good ELEMENT")
return d
def create_sql(self, list_basis_array):
"""Create the sql from the list of basis available data"""
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
# Create table
c.execute('''CREATE TABLE all_value
(name text, description text, elt text, data text)''')
import Queue
import threading
num_worker_threads = 7
attemps_max = 20
q_in = Queue.Queue(num_worker_threads)
q_out = Queue.Queue(num_worker_threads)
def worker():
"""get a Job from the q_in, do stuff, when finish put it in the q_out"""
while True:
[name, url, des, elts] = q_in.get()
url = self.create_url(url, name, elts)
attemps = 0
while attemps < attemps_max:
text = self.requests.get(url).text
try:
basis_data = self.basis_data_row_to_array(
text, name, des, elts)
break
except:
time.sleep(0.1)
attemps += 1
try:
q_out.put(([name, url, des, elts], basis_data))
q_in.task_done()
except:
print name, url, des
raise
def enqueue():
for [name, url, des, elts] in list_basis_array:
q_in.put(([name, url, des, elts]))
return 0
t = threading.Thread(target=enqueue)
t.daemon = True
t.start()
for i in range(num_worker_threads):
t = threading.Thread(target=worker)
t.daemon = True
t.start()
nb_basis = len(list_basis_array)
for i in range(nb_basis):
[name, url, des, elts], basis_data = q_out.get()
try:
c.executemany(
"INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
conn.commit()
print '{:>3}'.format(i + 1), "/", nb_basis, name
except:
print '{:>3}'.format(i + 1), "/", nb_basis, name, "fail",
print ' ', [url, des, elts]
raise
conn.close()
q_in.join()
def new_db(self):
"""Create new_db from scratch"""
_data = self.dwl_basis_list_raw()
array_basis = self.bl_raw_to_array(_data)
del _data
self.create_sql(array_basis)
class EMSL_local:
def __init__(self, db_path=None):
self.db_path = db_path
def get_list_basis_available(self, elts=[]):
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
if not elts:
c.execute("SELECT DISTINCT name,description from all_value")
data = c.fetchall()
else:
cmd = [
"SELECT name,description FROM all_value WHERE elt=?"] * len(elts)
cmd = " INTERSECT ".join(cmd) + ";"
c.execute(cmd, elts)
data = c.fetchall()
data = [i[:] for i in data]
conn.close()
return data
def get_list_element_available(self, basis_name):
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
c.execute(
"SELECT DISTINCT elt from all_value WHERE name=:name_us COLLATE NOCASE", {
"name_us": basis_name})
data = c.fetchall()
data = [str(i[0]) for i in data]
conn.close()
return data
def get_basis(self, basis_name, elts=None, with_l=False):
def get_list_type(l_line):
l = []
for i, line in enumerate(l_line):
m = re.search(p, line)
if m:
l.append([m.group(1), i])
try:
l[-2].append(i)
except IndexError:
pass
l[-1].append(i + 1)
return l
import re
# __ _
# /__ _ _|_ _|_ ._ _ ._ _ _ _. |
# \_| (/_ |_ | | (_) | | | _> (_| |
# |
conn = sqlite3.connect(self.db_path)
c = conn.cursor()
if elts:
cmd_ele = "AND " + " ".join(cond_sql_or("elt", elts))
else:
cmd_ele = ""
c.execute('''SELECT DISTINCT data from all_value
WHERE name="{basis_name}" COLLATE NOCASE
{cmd_ele}'''.format(basis_name=basis_name,
cmd_ele=cmd_ele))
l_data_raw = c.fetchall()
conn.close()
# |_| _. ._ _| | _ || | ||
# | | (_| | | (_| | (/_ |_
#
p = re.compile(ur'^(\w)\s+\d+\b')
l_data = []
for data_raw in l_data_raw:
basis = data_raw[0].strip()
l_line_raw = basis.split("\n")
l_line = [l_line_raw[0]]
for symmetry, begin, end in get_list_type(l_line_raw):
if not(with_l) and symmetry in "L":
body_s = []
body_p = []
for i_l in l_line_raw[begin + 1:end]:
a = i_l.split()
common = "{:>3}".format(a[0])
common += "{:>15.7f}".format(float(a[1]))
tail_s = common + "{:>23.7f}".format(float(a[2]))
body_s.append(tail_s)
tail_p = common + "{:>23.7f}".format(float(a[3]))
body_p.append(tail_p)
l_line += [l_line_raw[begin].replace("L", "S")]
l_line += body_s
l_line += [l_line_raw[begin].replace("L", "P")]
l_line += body_p
else:
l_line += l_line_raw[begin:end]
l_data.append("\n".join(l_line))
return l_data
format_dict = \
{
"g94": "Gaussian94",
"gamess-us": "GAMESS-US",
"gamess-uk": "GAMESS-UK",
"turbomole": "Turbomole",
"tx93": "TX93",
"molpro": "Molpro",
"molproint": "MolproInt",
"hondo": "Hondo",
"supermolecule": "SuperMolecule",
"molcas": "Molcas",
"hyperchem": "HyperChem",
"dalton": "Dalton",
"demon-ks": "deMon-KS",
"demon2k": "deMon2k",
"aces2": "AcesII",
}
if __name__ == "__main__":
e = EMSL_local(db_path="EMSL.db")
l = e.get_list_basis_available()
for i in l:
print i
l = e.get_list_element_available("pc-0")
print l
l = e.get_basis("cc-pVTZ", ["H", "He"])
for i in l:
print i

0
src/misc/__init__.py Normal file
View File

View File

@ -59,12 +59,20 @@ class Pattern(object):
either = [list(child.children) for child in transform(self).children]
for case in either:
for e in [child for child in case if case.count(child) > 1]:
if type(e) is Argument or type(e) is Option and e.argcount:
if isinstance(
e,
Argument) or isinstance(
e,
Option) and e.argcount:
if e.value is None:
e.value = []
elif type(e.value) is not list:
elif not isinstance(e.value, list):
e.value = e.value.split()
if type(e) is Command or type(e) is Option and e.argcount == 0:
if isinstance(
e,
Command) or isinstance(
e,
Option) and e.argcount == 0:
e.value = 0
return self
@ -84,10 +92,10 @@ def transform(pattern):
if any(t in map(type, children) for t in parents):
child = [c for c in children if type(c) in parents][0]
children.remove(child)
if type(child) is Either:
if isinstance(child, Either):
for c in child.children:
groups.append([c] + children)
elif type(child) is OneOrMore:
elif isinstance(child, OneOrMore):
groups.append(child.children * 2 + children)
else:
groups.append(child.children + children)
@ -117,10 +125,10 @@ class LeafPattern(Pattern):
left_ = left[:pos] + left[pos + 1:]
same_name = [a for a in collected if a.name == self.name]
if type(self.value) in (int, list):
if type(self.value) is int:
if isinstance(self.value, int):
increment = 1
else:
increment = ([match.value] if type(match.value) is str
increment = ([match.value] if isinstance(match.value, str)
else match.value)
if not same_name:
match.value = increment
@ -151,7 +159,7 @@ class Argument(LeafPattern):
def single_match(self, left):
for n, pattern in enumerate(left):
if type(pattern) is Argument:
if isinstance(pattern, Argument):
return n, Argument(self.name, pattern.value)
return None, None
@ -169,7 +177,7 @@ class Command(Argument):
def single_match(self, left):
for n, pattern in enumerate(left):
if type(pattern) is Argument:
if isinstance(pattern, Argument):
if pattern.value == self.name:
return n, Command(self.name, True)
else:

0
src/parser/__init__.py Normal file
View File

View File

@ -0,0 +1,52 @@
# _
# / |_ _ _ | _. | o _| o _|_
# \_ | | (/_ (_ |< \/ (_| | | (_| | |_ \/
# /
# Do this After the L special case traitement.
import sys
def check_gamess(str_type):
"""Check is the orbital type is handle by gamess"""
assert len(str_type) == 1
if str_type in "S P D".split():
return True
elif str_type == "SP":
raise BaseException
else:
return True
def check_NWChem(str_type):
"""Check is the orbital type is handle by gamess"""
assert len(str_type) == 1
if str_type in "S P D".split():
return True
elif str_type > "I" or str_type in "K L M".split():
raise BaseException
else:
return True
d_check = {"GAMESS-US": check_gamess,
"NWChem": check_NWChem}
def get_check_function(name_program):
"""
Tranforme SP special function (create using get_symmetry_function)
into S and P
"""
try:
f = d_check[name_program]
except KeyError:
str_ = "You need to add a check funtion for your program {0}"
print >> sys.stderr, str_.format(name_program)
print >> sys.stderr, "This one are avalaible {0}".format(d_check.keys())
sys.exit(1)
return f

138
src/parser/gamess_us.py Normal file
View File

@ -0,0 +1,138 @@
# __
# /__ _. ._ _ _ _ _ _
# \_| (_| | | | (/_ _> _> |_| _>
#
from src.parser_handler import get_dict_ele
import re
def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
"""Parse the basis data raw html of gamess-us to get a nice tuple
Return (name, description, [[ele, data_ele],...])"""
basis_data = []
b = data.find("$DATA")
e = data.find("$END")
if (b == -1 or data.find("$DATA$END") != -1):
if debug:
print data
raise Exception("WARNING not DATA")
else:
dict_replace = {"PHOSPHOROUS": "PHOSPHORUS",
"D+": "E+",
"D-": "E-"}
for k, v in dict_replace.iteritems():
data = data.replace(k, v)
data = data[b + 5:e - 1].split('\n\n')
dict_ele = get_dict_ele()
for (elt, data_elt) in zip(elts, data):
elt_long_th = dict_ele[elt.lower()]
elt_long_exp = data_elt.split()[0].lower()
if "$" in data_elt:
if debug:
print "Eror",
raise Exception("WARNING bad split")
if elt_long_th == elt_long_exp:
basis_data.append([elt, data_elt.strip()])
else:
if debug:
print "th", elt_long_th
print "exp", elt_long_exp
print "abv", elt
raise Exception("WARNING not a good ELEMENT")
return (name, des, basis_data)
symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
def l_symmetry_gamess_us(atom_basis):
"""
Return the begin and the end of all the type of orbital
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
output: [ [type, begin, end], ...]
"""
# Example
# [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]"
l = []
for i, line in enumerate(atom_basis):
# Optimisation for not seaching all the time
if len(line) < 10:
m = re.search(symmetry_regex, line)
if m:
# Cause of L !
read_symmetry = m.group(1)
# L is real L or special SP
# Just check the number of exponant
if all([read_symmetry == "L",
len(atom_basis[i + 1].split()) == 4]):
real_symmetry = "SP"
else:
real_symmetry = read_symmetry
l.append([real_symmetry, i])
try:
l[-2].append(i)
except IndexError:
pass
l[-1].append(i + 1)
return l
def handle_l_gamess_us(l_atom_basis):
"""
Read l_atom_basis and change the SP in L and P
"""
l_data = []
for atom_basis in l_atom_basis:
# Split the data in line
l_line_raw = atom_basis.split("\n")
l_line = [l_line_raw[0]]
# l_line_raw[0] containt the name of the Atom
for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw):
if symmetry == "SP":
body_s = []
body_p = []
for i_l in l_line_raw[begin + 1:end]:
# one L => S & P
a = i_l.split()
common = "{:>3}".format(a[0])
common += "{:>15.7f}".format(float(a[1]))
tail_s = common + "{:>23.7f}".format(float(a[2]))
body_s.append(tail_s)
tail_p = common + "{:>23.7f}".format(float(a[3]))
body_p.append(tail_p)
l_line += [l_line_raw[begin].replace("L", "S")]
l_line += body_s
l_line += [l_line_raw[begin].replace("L", "P")]
l_line += body_p
else:
l_line += l_line_raw[begin:end]
l_data.append("\n".join(l_line))
return l_data

83
src/parser/gaussian94.py Normal file
View File

@ -0,0 +1,83 @@
# __ _
# /__ _. _ _ o _. ._ (_| |_|_
# \_| (_| |_| _> _> | (_| | | | |
#
import sys
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
The data-pairs item is actually expected to be a 2 item list:
[symbol, data]
e.g. ["Ca", "#BASIS SET..."]
N.B.: Currently ignores ECP data!
@param data: raw HTML from BSE
@type data : unicode
@param name: basis set name
@type name : str
@param des: basis set description
@type des : str
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
@type elements : list
@return: (name, description, data-pairs)
@rtype : tuple
"""
# Each basis set block starts and ends with ****. Find the region
# containing all the basis blocks using the first and last ****.
mark = "****"
begin = data.find(mark)
end = data.rfind(mark)
if begin == -1 or end == -1:
if debug:
print(data)
str_ = " No basis set data found while attempting to process {0} ({1})"
raise ValueError(str_.format(name, description))
trimmed = data[begin + len(mark): end - len(mark)].strip()
chunks = []
lines = []
# group lines of data delimited by mark into per-element chunks
for line in trimmed.split("\n"):
if line.startswith(mark):
if lines:
chunks.append(lines)
lines = [line]
else:
lines.append(line)
# handle trailing chunk that is not followed by another basis set block
# also remove the marker lines from the chunk itself
if lines and (not chunks or lines != chunks[-1]):
chunks.append(lines)
# join lines back into solid text blocks
chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
# check each block for element and assign symbols to final pairs
pairs = []
unused_elements = set([e.upper() for e in elements])
for chunk in chunks:
# get first 3 chars of first line in block
symbol = chunk.split("\n")[0][:3].strip()
try:
unused_elements.remove(symbol.upper())
except KeyError:
if debug:
msg = "Warning: already processed {0}\n".format(symbol)
sys.stderr.write(msg)
pairs.append([symbol, chunk])
if unused_elements:
msg = "Warning: elements {0} left over for {1}".format(
list(unused_elements),
name)
print(msg)
return (name, description, pairs)

228
src/parser/nwchem.py Normal file
View File

@ -0,0 +1,228 @@
# _
# |\ | / |_ _ ._ _
# | \| \/\/ \_ | | (/_ | | |
#
import json
def extract_basis_nwchem(data, name):
"""Extract atomic orbital, charge density fitting, or exchange
correlation functional basis data from a text region passed in as
data. The charge density fitting and exchange correlation functional
basis set data are employed for density functional calculations.
@param data: text region containing basis set data
@type data : str
@param name: name of basis type: "ao basis", "cd basis", or "xc basis"
@type name : str
@return: per-element basis set chunks
@rtype : list
"""
begin_marker = """BASIS "{0}" PRINT""".format(name)
end_marker = "END"
# search for the basis set data begin marker
# calling "upper" on data because original data has inconsistent
# capitalization
begin = data.upper().find(begin_marker.upper())
end = data.upper().find(end_marker, begin)
# No basis data found
if begin == -1:
return []
trimmed = data[begin + len(begin_marker): end - len(end_marker)].strip()
chunks = []
lines = []
# group lines of data delimited by #BASIS SET... into per-element chunks
for line in trimmed.split("\n"):
if line.upper().startswith("#BASIS SET"):
if lines:
chunks.append(lines)
lines = [line]
else:
lines.append(line)
# handle trailing chunk that is not followed by another #BASIS SET...
if lines and (not chunks or lines != chunks[-1]):
chunks.append(lines)
# join lines back into solid text blocks
chunks = ["\n".join(c) for c in chunks]
return chunks
def extract_ecp_nwchem(data):
"""Extract the effective core potential basis data from a text region
passed in as data.
@param data: text region containing ECP data
@type data : str
@return: per-element effective core potential chunks
@rtype : list
"""
ecp_begin_mark = "ECP\n"
ecp_end_mark = "END"
ecp_begin = data.upper().find(ecp_begin_mark)
ecp_end = data.upper().find(ecp_end_mark, ecp_begin)
ecp_region = ""
if ecp_begin > -1 and ecp_end > -1:
ecp_region = data[
ecp_begin +
len(ecp_begin_mark): ecp_end -
len(ecp_end_mark)].strip()
# No ECP data, so return empty list
else:
return []
chunks = []
lines = []
# group lines of data delimited by XX nelec YY into chunks, e.g.
# "Zn nelec 18" begins a zinc ECP
for line in ecp_region.split("\n"):
if line.lower().find(" nelec ") > -1:
if lines:
chunks.append(lines)
lines = [line]
else:
lines.append(line)
# handle trailing chunk that is not followed by another XX nelec YY..
if lines and (not chunks or lines != chunks[-1]):
chunks.append(lines)
# join lines back into solid text blocks
chunks = ["\n".join(c) for c in chunks]
return chunks
def unpack_nwchem_basis_block(data):
"""Unserialize a NWChem basis data block and extract components
@param data: a JSON of basis set data, perhaps containing many types
@type data : str
@return: unpacked data
@rtype : dict
"""
unpacked = json.loads(data)
return unpacked
def parse_basis_data_nwchem(data, name, description, elements, debug=True):
"""Parse the NWChem basis data raw html to get a nice tuple.
The data-pairs item is actually expected to be a 2 item list:
[symbol, data]
e.g. ["Ca", "#BASIS SET..."]
@param data: raw HTML from BSE
@type data : unicode
@param name: basis set name
@type name : str
@param des: basis set description
@type des : str
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
@type elements : list
@return: (name, description, data-pairs)
@rtype : tuple
"""
unused_elements = set([e.upper() for e in elements])
def extract_symbol(txt):
for sline in txt.split("\n"):
if not sline.startswith("#"):
try:
symbol = sline[:3].strip().split()[0]
return symbol
except IndexError:
continue
raise ValueError("Can't find element symbol in {0}".format(txt))
ao_chunks = extract_basis_nwchem(data, "ao basis")
cd_chunks = extract_basis_nwchem(data, "cd basis")
xc_chunks = extract_basis_nwchem(data, "xc basis")
ecp_chunks = extract_ecp_nwchem(data)
if not any([ao_chunks, cd_chunks, xc_chunks, ecp_chunks]):
str_ = "No basis set data found while attempting to process {0} ({1})"
raise ValueError(str_.format(name, description))
# Tag all used elements, whether from ordinary AO basis or ECP section
for chunk in ao_chunks + cd_chunks + xc_chunks + ecp_chunks:
try:
symbol = extract_symbol(chunk)
unused_elements.remove(symbol.upper())
except KeyError:
pass
if unused_elements:
msg = "Warning: elements {0} left over for {1}"
print msg.format(list(unused_elements), name)
# Form packed chunks, turn packed chunks into pairs
used_elements = set()
packed = {}
for cgroup, gname in [(ao_chunks, "ao basis"), (cd_chunks, "cd basis"),
(xc_chunks, "xc basis"), (ecp_chunks, "ecp")]:
for chunk in cgroup:
symbol = extract_symbol(chunk)
# Expand entry, e.g. add ecp data for Na after it has ao basis
try:
idx, ch = packed[symbol]
ch[gname] = chunk
chunk_dict = ch.copy()
# Create fresh entry, e.g. add Na with initial ao basis
except KeyError:
chunk_dict = {gname: chunk}
idx = len(used_elements)
used_elements.add(symbol)
packed[symbol] = (idx, chunk_dict)
"""
for chunk in ao_chunks:
symbol = extract_symbol(chunk)
chunk_dict = {"ao basis" : chunk}
idx = len(used_elements)
used_elements.add(symbol)
packed[symbol] = (idx, chunk_dict)
for chunk in ecp_chunks:
symbol = extract_symbol(chunk)
#add ECP data if existing chunk, else create fresh chunk
try:
idx, ch = packed[symbol]
ch["ecp"] = chunk
chunk_dict = ch.copy()
except KeyError:
chunk_dict = {"ecp" : chunk}
idx = len(used_elements)
used_elements.add(symbol)
packed[symbol] = (idx, chunk_dict)
"""
values = sorted(packed.values())
# Assign (Symbol, Serialized) to final pairs
pairs = []
for idx, chunk in values:
symbol = extract_symbol(chunk.get("ao basis")
or chunk.get("cd basis")
or chunk.get("xc basis")
or chunk.get("ecp"))
serialized = json.dumps(chunk)
pairs.append([symbol, serialized])
return [name, description, pairs]

138
src/parser_handler.py Normal file
View File

@ -0,0 +1,138 @@
import sys
import os
import re
def get_dict_ele():
"""Return dict[atom]=[abreviation]"""
elt_path = os.path.dirname(sys.argv[0]) + "/src/misc/elts_abrev.dat"
with open(elt_path, "r") as f:
data = f.readlines()
dict_ele = dict()
for i in data:
l = i.split("-")
dict_ele[l[1].strip().lower()] = l[2].strip().lower()
return dict_ele
# ______ _ _ _ _
# | ___| | | | (_) | |
# | |_ _ __ ___ _ __ ___ __ _| |_ __| |_ ___| |_
# | _| '__/ _ \| '_ ` _ \ / _` | __| / _` | |/ __| __|
# | | | | | (_) | | | | | | (_| | |_ | (_| | | (__| |_
# \_| |_| \___/|_| |_| |_|\__,_|\__| \__,_|_|\___|\__|
#
from src.parser.gamess_us import parse_basis_data_gamess_us
from src.parser.gaussian94 import parse_basis_data_gaussian94
from src.parser.nwchem import parse_basis_data_nwchem
parser_dict = {"Gaussian94": parse_basis_data_gaussian94,
"GAMESS-US": parse_basis_data_gamess_us,
"NWChem": parse_basis_data_nwchem,
"GAMESS-UK": None,
"Turbomole": None,
"TX93": None,
"Molpro": None,
"MolproInt": None,
"Hondo": None,
"SuperMolecule": None,
"Molcas": None,
"HyperChem": None,
"Dalton": None,
"deMon-KS": None,
"deMon2k": None,
"AcesII": None}
def check_format(format):
try:
parser_dict[format]
except KeyError:
str_ = ["This format ({0}) is not available in EMSL".format(format),
"EMSL provide this list : {0}".format(parser_dict.keys())]
print >> sys.stderr, "\n".join(str_)
sys.exit(1)
else:
return format
def get_parser_function(format):
if not parser_dict[format]:
list_parser = [k for k, v in parser_dict.iteritems() if v]
str_ = ["We have no parser for this format {0}".format(format),
"We only support {0}".format(list_parser),
"Fill free to Fock /pull request",
"You just need to add a function like this one:",
"'src.pars.gamess_us.parse_basis_data_gamess_us'"]
print >> sys.stderr, "\n".join(str_)
sys.exit(1)
else:
return parser_dict[format]
# _____ _ _ _ _
# / ___| | | | (_) | |
# \ `--. _ _ _ __ ___ _ __ ___ ___| |_ _ __ _ _ __| |_ ___| |_
# `--. \ | | | '_ ` _ \| '_ ` _ \ / _ \ __| '__| | | | / _` | |/ __| __|
# /\__/ / |_| | | | | | | | | | | | __/ |_| | | |_| | | (_| | | (__| |_
# \____/ \__, |_| |_| |_|_| |_| |_|\___|\__|_| \__, | \__,_|_|\___|\__|
# __/ | __/ |
# |___/ |___/
"""
Return the begin and the end of all the type of orbital
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
output: [ [type, begin, end], ...]
"""
from src.parser.gamess_us import l_symmetry_gamess_us
symmetry_dict = {"GAMESS-US": l_symmetry_gamess_us}
def get_symmetry_function(format):
"""
Return the begin and the end of all the type of orbital
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
output: [ [type, begin, end], ...]
"""
try:
f = symmetry_dict[format]
except KeyError:
print >> sys.stderr, "You need to add a function in symmetry_dict"
print >> sys.stderr, "for your format ({0})".format(format)
sys.exit(1)
else:
return f
# _ _ _ _ _ _ _ _ _ ______ _ _
# | | | | | | | ( | ) | ( | ) | _ (_) | |
# | |_| | __ _ _ __ __| | | ___ V V| | V V | | | |_ ___| |_
# | _ |/ _` | '_ \ / _` | |/ _ \ | | | | | | |/ __| __|
# | | | | (_| | | | | (_| | | __/ | |____ | |/ /| | (__| |_
# \_| |_/\__,_|_| |_|\__,_|_|\___| \_____/ |___/ |_|\___|\__|
"""
Tranforme SP special function (create using get_symmetry_function) into S and P
"""
from src.parser.gamess_us import handle_l_gamess_us
handle_l_dict = {"GAMESS-US": handle_l_gamess_us}
def get_handle_l_function(format):
"""
Tranforme SP special function (create using get_symmetry_function)
into S and P
"""
try:
f = handle_l_dict[format]
except KeyError:
print >> sys.stderr, "You need to add a function in handle_l_dict"
print >> sys.stderr, "for your format ({0})".format(format)
sys.exit(1)
else:
return f