Merge pull request #3 from TApplencourt/master

Major change ! Read descritpion
2024-12-22 12:23:39 +01:00 · 2015-03-18 17:54:44 +01:00 · 2015-03-18 17:54:44 +01:00 · 839391ffe7
commit 839391ffe7
parent addfdbdf31 85590ee477
15 changed files with 1436 additions and 524 deletions
--- a/EMSL_api.py
+++ b/EMSL_api.py
@ -4,19 +4,22 @@
 """EMSL Api.

 Usage:
-  EMSL_api.py list_basis        [--atom=atom_name...]
-                                [--db_path=db_path]
-  EMSL_api.py list_atoms  --basis=basis_name
-                                [--db_path=db_path]
-  EMSL_api.py get_basis_data --basis=basis_name
-                                [--atom=atom_name...]
-                                [--db_path=db_path]
-                                [--with_l]
-                                [(--save [--path=path])]
+  EMSL_api.py list_basis [--basis=<basis_name>...]
+                         [--atom=<atom_name>...]
+                         [--db_path=<db_path>]
+                         [--average_mo_number]
+  EMSL_api.py list_atoms --basis=<basis_name>
+                         [--db_path=<db_path>]
+  EMSL_api.py get_basis_data --basis=<basis_name>
+                             [--atom=<atom_name>...]
+                             [--db_path=<db_path>]
+                             [(--save [--path=<path>])]
+                             [--check=<program_name>]
+                             [--treat_l]
  EMSL_api.py list_formats
-  EMSL_api.py create_db      --db_path=db_path
-                             --format=format
-                             [--no-contraction]
+  EMSL_api.py create_db --format=<format>
+                        [--db_path=<db_path>]
+                        [--no-contraction]
  EMSL_api.py (-h | --help)
  EMSL_api.py --version

@ -27,75 +30,100 @@ Options:

 <db_path> is the path to the SQLite3 file containing the Basis sets.
 By default is $EMSL_API_ROOT/db/Gausian_uk.db
+
+Example of use:
+    ./EMSL_api.py list_basis --atom Al --atom U
+    ./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
+    ./EMSL_api.py list_atoms --basis ANO-RCC
+    ./EMSL_api.py get_basis_data --basis 3-21++G*
 """

-version = "0.2.0"
+version = "0.8.1"

-import sys
+import os

-from src.docopt import docopt
-from src.EMSL_utility import EMSL_dump
-from src.EMSL_utility import format_dict
-from src.EMSL_utility import EMSL_local
+from src.misc.docopt import docopt
+from src.EMSL_dump import EMSL_dump
+from src.EMSL_local import EMSL_local, checkSQLite3

 if __name__ == '__main__':

    arguments = docopt(__doc__, version='EMSL Api ' + version)

+    # ___
+    #  |  ._  o _|_
+    # _|_ | | |  |_
+    #
+
    if arguments["--db_path"]:
        db_path = arguments["--db_path"]
    else:
-        import os
-        db_path = os.path.dirname(__file__) + "/db/Gamess-us.db"
+        db_path = os.path.dirname(__file__) + "/db/GAMESS-US.db"
+
+    # Check the db
+    try:
+        if not(arguments['create_db']):
+            db_path, db_path_changed = checkSQLite3(db_path)
+    except:
+        raise
+
+    #  _     _     _    ______           _
+    # | |   (_)   | |   | ___ \         (_)
+    # | |    _ ___| |_  | |_/ / __ _ ___ _ ___
+    # | |   | / __| __| | ___ \/ _` / __| / __|
+    # | |___| \__ \ |_  | |_/ / (_| \__ \ \__ \
+    # \_____/_|___/\__| \____/ \__,_|___/_|___/

-    # _     _     _    ______           _
-    #| |   (_)   | |   | ___ \         (_)
-    #| |    _ ___| |_  | |_/ / __ _ ___ _ ___
-    #| |   | / __| __| | ___ \/ _` / __| / __|
-    #| |___| \__ \ |_  | |_/ / (_| \__ \ \__ \
-    #\_____/_|___/\__| \____/ \__,_|___/_|___/
-    #
    if arguments["list_basis"]:
        e = EMSL_local(db_path=db_path)

-        elts = arguments["--atom"]
-        l = e.get_list_basis_available(elts)
+        l = e.list_basis_available(arguments["--atom"],
+                                   arguments["--basis"],
+                                   arguments["--average_mo_number"])

-        for name, des in l:
-            print name, "|", des
+        if arguments["--average_mo_number"]:
+            for name, des, avg in l:
+                print "- '{}' ({}) || {:<50}".format(name, avg, des)
+        else:
+            for name, des in l:
+                print "- '{}' || {:<50}".format(name, des)

-    # _     _     _     _____ _                           _
-    #| |   (_)   | |   |  ___| |                         | |
-    #| |    _ ___| |_  | |__ | | ___ _ __ ___   ___ _ __ | |_ ___
-    #| |   | / __| __| |  __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
-    #| |___| \__ \ |_  | |___| |  __/ | | | | |  __/ | | | |_\__ \
-    #\_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
-    if arguments["list_atoms"]:
+    #  _     _     _     _____ _                           _
+    # | |   (_)   | |   |  ___| |                         | |
+    # | |    _ ___| |_  | |__ | | ___ _ __ ___   ___ _ __ | |_ ___
+    # | |   | / __| __| |  __|| |/ _ \ '_ ` _ \ / _ \ '_ \| __/ __|
+    # | |___| \__ \ |_  | |___| |  __/ | | | | |  __/ | | | |_\__ \
+    # \_____/_|___/\__| \____/|_|\___|_| |_| |_|\___|_| |_|\__|___/
+    elif arguments["list_atoms"]:
        e = EMSL_local(db_path=db_path)

        basis_name = arguments["--basis"]
        l = e.get_list_element_available(basis_name)
        print ", ".join(l)

-    #______           _           _       _
-    #| ___ \         (_)         | |     | |
-    #| |_/ / __ _ ___ _ ___    __| | __ _| |_ __ _
-    #| ___ \/ _` / __| / __|  / _` |/ _` | __/ _` |
-    #| |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
-    #\____/ \__,_|___/_|___/  \__,_|\__,_|\__\__,_|
-    if arguments["get_basis_data"]:
+    # ______           _           _       _
+    # | ___ \         (_)         | |     | |
+    # | |_/ / __ _ ___ _ ___    __| | __ _| |_ __ _
+    # | ___ \/ _` / __| / __|  / _` |/ _` | __/ _` |
+    # | |_/ / (_| \__ \ \__ \ | (_| | (_| | || (_| |
+    # \____/ \__,_|___/_|___/  \__,_|\__,_|\__\__,_|
+    elif arguments["get_basis_data"]:
        e = EMSL_local(db_path=db_path)
-        basis_name = arguments["--basis"]
+        basis_name = arguments["--basis"][0]
        elts = arguments["--atom"]

-        l = e.get_basis(basis_name, elts,arguments["--with_l"])
-        str_ = "\n\n".join(l) + "\n"
+        l_atom_basis = e.get_basis(basis_name, elts,
+                                   arguments["--treat_l"],
+                                   arguments["--check"])
+        # Add separation between atoms, and a empty last line
+        str_ = "\n\n".join(l_atom_basis) + "\n"

        if arguments["--save"]:

            if arguments["--path"]:
                path = arguments["--path"]
            else:
+                # The defaut path is bais
                path = "_".join([basis_name, ".".join(elts)])
                path = "/tmp/" + path + ".bs"

@ -105,32 +133,39 @@ if __name__ == '__main__':
        else:
            print str_

-    # _     _     _      __                           _
-    #| |   (_)   | |    / _|                         | |
-    #| |    _ ___| |_  | |_ ___  _ __ _ __ ___   __ _| |_ ___
-    #| |   | / __| __| |  _/ _ \| '__| '_ ` _ \ / _` | __/ __|
-    #| |___| \__ \ |_  | || (_) | |  | | | | | | (_| | |_\__ \
-    #\_____/_|___/\__| |_| \___/|_|  |_| |_| |_|\__,_|\__|___/
-    if arguments["list_formats"]:
-        for i in format_dict:
+    #  _     _     _      __                           _
+    # | |   (_)   | |    / _|                         | |
+    # | |    _ ___| |_  | |_ ___  _ __ _ __ ___   __ _| |_ ___
+    # | |   | / __| __| |  _/ _ \| '__| '_ ` _ \ / _` | __/ __|
+    # | |___| \__ \ |_  | || (_) | |  | | | | | | (_| | |_\__ \
+    # \_____/_|___/\__| |_| \___/|_|  |_| |_| |_|\__,_|\__|___/
+    elif arguments["list_formats"]:
+        e = EMSL_dump()
+        for i in e.get_list_format():
            print i

-    # _____                _             _ _
-    #/  __ \              | |           | | |
-    #| /  \/_ __ ___  __ _| |_ ___    __| | |__
-    #| |   | '__/ _ \/ _` | __/ _ \  / _` | '_ \
-    #| \__/\ | |  __/ (_| | ||  __/ | (_| | |_) |
-    # \____/_|  \___|\__,_|\__\___|  \__,_|_.__/
-    if arguments["create_db"]:
+    #  _____                _             _ _
+    # /  __ \              | |           | | |
+    # | /  \/_ __ ___  __ _| |_ ___    __| | |__
+    # | |   | '__/ _ \/ _` | __/ _ \  / _` | '_ \
+    # | \__/\ | |  __/ (_| | ||  __/ | (_| | |_) |
+    #  \____/_|  \___|\__,_|\__\___|  \__,_|_.__/
+    elif arguments["create_db"]:
        db_path = arguments["--db_path"]
        format = arguments["--format"]
-        if format not in format_dict:
-            print "Format %s doesn't exist. Run list_formats to get the list of formats." % (format)
-            sys.exit(1)
+
        contraction = not arguments["--no-contraction"]

-        e = EMSL_dump(
-            db_path=db_path,
-            format=format_dict[format],
-            contraction=contraction)
+        e = EMSL_dump(db_path=db_path,
+                      format=format,
+                      contraction=contraction)
        e.new_db()
+
+    #  _
+    # /  |  _   _. ._  o ._   _
+    # \_ | (/_ (_| | | | | | (_|
+    #                         _|
+
+    # Clean up on exit
+    if not(arguments['create_db']) and db_path_changed:
+        os.system("rm -f /dev/shm/%d.db" % (os.getpid()))
--- a/README.md
+++ b/README.md
@ -4,28 +4,34 @@ EMSL_Basis_Set_Exchange_Local

 Create of Local Copy of the famous [EMSL Basis Set Exchange](https://bse.pnl.gov/bse/portal) and use it easily with the API.

-* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website (One database for all the basis set of one format);
+* Make a slight copy (40Mo Sqlite3 database) of the EMSL Basis Set Exchange website. Currently avalaible format are :
+ *  Gamess-us, Gaussian94 and NEWCHEM;
 * API for scripting;  
 * Quick local access without delay;
-* Only need [Python](https://www.python.org/) and [Request](http://docs.python-requests.org/en/latest/) module.
+* Only need [Python](https://www.python.org/)

 ##Dependencies
 * Python >2.6
-* Request ```pip install requests``` (in a virtual env or with sudo)
+
+###### Optional
+If you plan to download manually some database -not using the pre existing one- you need :
+* [Request](http://docs.python-requests.org/en/latest/) python module. ```$pip install requests``` (do it in a virtual env or with sudo)

 ##Installation
-* Download the git (```$ git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
-* Done ! You can now, use ```EMSL_api.py```
+* Download the git repertory (```$git clone https://github.com/TApplencourt/EMSL_Basis_Set_Exchange_Local.git``` for example)
+* That all! You can now, use ```EMSL_api.py```

 ##Usage
 ```
 EMSL Api.

 Usage:
-  EMSL_api.py list_basis        [--atom=<atom_name>...]
-                                [--db_path=<db_path>]
+  EMSL_api.py list_basis  [--basis=<basis_name>...]
+                          [--atom=<atom_name>...]
+                          [--db_path=<db_path>]
+                          [--average_mo_number]
  EMSL_api.py list_atoms  --basis=<basis_name>
-                                [--db_path=<db_path>]
+                          [--db_path=<db_path>]
  EMSL_api.py get_basis_data --basis=<basis_name>
                                [--atom=<atom_name>...]
                                [--db_path=<db_path>]
@ -45,6 +51,12 @@ Options:

 <db_path> is the path to the SQLite3 file containing the Basis sets.
 By default is $EMSL_API_ROOT/db/Gausian_uk.db
+
+Example of use:
+    ./EMSL_api.py list_basis --atom Al --atom U
+    ./EMSL_api.py list_basis --atom S --basis 'cc-pV*' --average_mo_number
+    ./EMSL_api.py list_atoms --basis ANO-RCC
+    ./EMSL_api.py get_basis_data --basis 3-21++G*
 ```
 ##Demonstration

@ -53,14 +65,47 @@ By default is $EMSL_API_ROOT/db/Gausian_uk.db
 (For a beter quality see the [Source](https://asciinema.org/api/asciicasts/15380))

 ##To do
-For now  we can only parse Gaussian-US basis set type file. (Look at ```./src/EMSL_utility.py#EMSL_dump.basis_data_row_to_array```)
+For now  we can only parse `Gamess-us, Gaussian94 and NEWCHEM` (Thanks to @mattbernst for Gaussian94 and NEWCHEM) basis set type file.
+
+###I need more format!
+
+I realy simple. Just read the few explanation bellow.
+
+You just need to provide a function who will split the basis data who containt all the atoms in atom only tuple.
+
+Sommething like this:
+```python
+def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
+    """Parse the Gaussian94 basis data raw html to get a nice tuple.
+
+    The data-pairs item is actually expected to be a 2 item list:
+    [symbol, data]
+
+    e.g. ["Ca", "#BASIS SET..."]
+
+    N.B.: Currently ignores ECP data!
+
+    @param data: raw HTML from BSE
+    @type data : unicode
+    @param name: basis set name
+    @type name : str
+    @param des: basis set description
+    @type des : str
+    @param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
+    @type elements : list
+    @return: (name, description, data-pairs)
+    @rtype : tuple
+    """
+```
+
+Then just add the function in `src.parser_handler.format_dict`. You are ready to go!

 Feel free to fork/pull request. 

 ##Disclaimer
 It'is not a official API. Use it with moderation.

-In papers where you use the basis sets obtained from the Basis Set Exchange please site this :
+In papers where you use the basis sets obtained from the Basis Set Exchange please site this:
 >The Role of Databases in Support of Computational Chemistry Calculations
 >
 >>--<cite>Feller, D.; J. Comp. Chem., 17(13), 1571-1586, 1996.</cite>
--- a/db/GAMESS-US.db
+++ b/db/GAMESS-US.db
--- a/src/EMSL_dump.py
+++ b/src/EMSL_dump.py
@ -0,0 +1,303 @@
+import os
+import sys
+import re
+import time
+import sqlite3
+
+from collections import OrderedDict
+
+
+def install_with_pip(name):
+
+    ins = False
+    d = {'y': True,
+         'n': False}
+
+    while True:
+        choice = raw_input('Do you want to install it ? [Y/N]')
+        try:
+            ins = d[choice.lower()]
+            break
+        except:
+            print "not a valid choice"
+
+    if ins:
+        try:
+            import pip
+            pip.main(['install', name])
+        except:
+            print "You need pip"
+            print "(http://pip.readthedocs.org/en/latest/installing.html)"
+            sys.exit(1)
+
+
+class EMSL_dump:
+
+    """
+    This call implement all you need for download the EMSL and save it localy
+    """
+
+    def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
+
+        from src.parser_handler import get_parser_function
+        from src.parser_handler import check_format
+
+        self.format = check_format(format)
+        self.parser = get_parser_function(self.format)
+
+        if db_path:
+            self.db_path = db_path
+        else:
+            head_path = os.path.dirname(__file__)
+            self.db_path = "{0}/../db/{1}.db".format(head_path, self.format)
+
+        self.contraction = str(contraction)
+        self.debug = False
+
+        try:
+            import requests
+        except:
+            print "You need the requests package"
+            install_with_pip("requests")
+        finally:
+            self.requests = requests
+
+    def get_list_format(self):
+        """List all the format available in EMSL"""
+        from src.parser_handler import parser_dict
+        return parser_dict.keys()
+
+    def set_db_path(self, path):
+        """Define the database path"""
+        self.db_path = path
+
+    def dwl_basis_list_raw(self):
+        """Return the source code of the iframe
+           who contains the list of the basis set available"""
+
+        print "Download all the name available in EMSL."
+        print "It can take some time.",
+        sys.stdout.flush()
+
+        url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
+        if self.debug:
+            import cPickle as pickle
+            dbcache = 'db/cache'
+            if not os.path.isfile(dbcache):
+                page = self.requests.get(url).text
+                file = open(dbcache, 'w')
+                pickle.dump(page, file)
+            else:
+                file = open(dbcache, 'r')
+                page = pickle.load(file)
+            file.close()
+
+        else:
+            page = self.requests.get(url).text
+
+        print "Done"
+        return page
+
+    def basis_list_raw_to_array(self, data_raw):
+        """Parse the raw html basis set to create a dict
+           will all the information for dowloanding the database :
+        Return d[name] = [name, xml_path, description,
+                          lits of the elements available]
+
+         Explanation of tuple data from 'tup' by index:
+
+         0 - path to xml file
+         1 - basis set name
+         2 - categorization: "dftcfit", "dftorb", "dftxfit", "diffuse",
+                "ecporb","effective core potential", "orbital", "polarization",
+                "rydberg", or "tight"
+         3 - parameterized elements by symbol e.g. '[H, He, B, C, N, O, F, Ne]'
+         4 - curation status; only 'published' is trustworthy
+         5 - boolean: has ECP
+         6 - boolean: has spin
+         7 - last modified date
+         8 - name of primary developer
+         9 - name of contributor
+        10 - human-readable summary/description of basis set
+        """
+
+        d = OrderedDict()
+
+        for line in data_raw.split('\n'):
+
+            if "new basisSet(" in line:
+                b = line.find("(")
+                e = line.find(");")
+
+                s = line[b + 1:e]
+
+                tup = eval(s)
+
+                xml_path = tup[0]
+
+                # non-published (e.g. rejected) basis sets and ecp should be
+                # ignored
+                if tup[4] != "published" or "-ecp" in xml_path.lower():
+                    continue
+
+                name = tup[1]
+                elts = re.sub('[["\ \]]', '', tup[3]).split(',')
+                des = re.sub('\s+', ' ', tup[-1])
+
+                d[name] = [name, xml_path, des, elts]
+
+        return d
+
+    #  _____                _
+    # /  __ \              | |
+    # | /  \/_ __ ___  __ _| |_ ___
+    # | |   | '__/ _ \/ _` | __/ _ \
+    # | \__/\ | |  __/ (_| | ||  __/
+    #  \____/_|  \___|\__,_|\__\___|
+    #
+    def create_sql(self, dict_basis_list):
+        """Create the sql from strach.
+            Take the list of basis available data,
+            download her, put her in sql"""
+
+        if os.path.isfile(self.db_path):
+            print >> sys.stderr, "FAILLURE:"
+            print >> sys.stderr, "{0} file alredy exist.".format(self.db_path),
+            print >> sys.stderr, "Delete or remove it"
+            sys.exit(1)
+
+        conn = sqlite3.connect(self.db_path)
+        c = conn.cursor()
+
+        c.execute('''CREATE TABLE basis_tab(
+                            basis_id INTEGER PRIMARY KEY AUTOINCREMENT,
+                                name text,
+                         description text,
+                                UNIQUE(name)
+                  );''')
+
+        c.execute('''CREATE TABLE data_tab(
+                           basis_id INTEGER,
+                                elt TEXT,
+                               data TEXT,
+                    FOREIGN KEY(basis_id)
+                    REFERENCES basis_tab(basis_id)
+                    );''')
+
+        c.execute('''CREATE TABLE format_tab(format TEXT)''')
+        c.execute('''INSERT INTO format_tab VALUES (?)''', [self.format])
+        conn.commit()
+
+        c.execute(''' CREATE VIEW output_tab AS
+                        SELECT basis_id,
+                               name,
+                               description,
+                               elt,
+                               data
+                        FROM   basis_tab
+                NATURAL JOIN   data_tab
+                    ''')
+
+        import Queue
+        import threading
+
+        num_worker_threads = 7
+        attemps_max = 20
+
+        q_in = Queue.Queue(num_worker_threads)
+        q_out = Queue.Queue(num_worker_threads)
+
+        def worker():
+            """get a Job from the q_in, do stuff,
+               when finish put it in the q_out"""
+            while True:
+                name, path_xml, des, elts = q_in.get()
+
+                url = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
+                url += "/eventSubmit_doDownload/true"
+
+                params = {'bsurl': path_xml, 'bsname': name,
+                          'elts': " ".join(elts),
+                          'format': self.format,
+                          'minimize': self.contraction}
+
+                attemps = 0
+                while attemps < attemps_max:
+                    text = self.requests.get(url, params=params).text
+                    try:
+                        basis_data = self.parser(text, name, des, elts,
+                                                 self.debug)
+                    except:
+                        time.sleep(0.1)
+                        attemps += 1
+                    else:
+                        break
+
+                try:
+                    q_out.put(basis_data)
+                except:
+                    if self.debug:
+                        print "Fail on q_out.put", basis_data
+                    raise
+                else:
+                    q_in.task_done()
+
+        def enqueue():
+            for [name, path_xml, des, elts] in dict_basis_list.itervalues():
+                q_in.put([name, path_xml, des, elts])
+
+            return 0
+
+        t = threading.Thread(target=enqueue)
+        t.daemon = True
+        t.start()
+
+        for i in range(num_worker_threads):
+            t = threading.Thread(target=worker)
+            t.daemon = True
+            t.start()
+
+        nb_basis = len(dict_basis_list)
+
+        for i in range(nb_basis):
+            name, des, basis_data = q_out.get()
+            q_out.task_done()
+
+            str_indice = '{:>3}'.format(i + 1)
+            str_ = '{0} / {1} | {2}'.format(str_indice, nb_basis, name)
+
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+            # A d d _ t h e _ b a s i s _ n a m e #
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+            try:
+                cmd = "INSERT INTO basis_tab(name,description) VALUES (?,?)"
+                c.execute(cmd, [name, des])
+                conn.commit()
+            except sqlite3.IntegrityError:
+                print str_, "Fail"
+
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+            # A d d _ t h e _ b a s i s _ d a t a #
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+
+            id_ = [c.lastrowid]
+
+            try:
+                cmd = "INSERT INTO data_tab(basis_id,elt,data) VALUES (?,?,?)"
+                c.executemany(cmd, [id_ + k for k in basis_data])
+                conn.commit()
+            except sqlite3.IntegrityError:
+                print str_, "Fail"
+            else:
+                print str_
+        conn.close()
+
+        q_in.join()
+
+    def new_db(self):
+        """Create new_db from scratch"""
+
+        _data = self.dwl_basis_list_raw()
+        array_basis = self.basis_list_raw_to_array(_data)
+
+        self.create_sql(array_basis)
--- a/src/EMSL_local.py
+++ b/src/EMSL_local.py
@ -0,0 +1,317 @@
+# -*- coding: utf-8 -*-
+
+import sqlite3
+import re
+import sys
+import os
+
+
+def checkSQLite3(db_path):
+    """Check if the db_path is a good one"""
+
+    from os.path import isfile, getsize
+
+    db_path = os.path.expanduser(db_path)
+    db_path = os.path.expandvars(db_path)
+    db_path = os.path.abspath(db_path)
+
+    # Check if db file is readable
+    if not os.access(db_path, os.R_OK):
+        print >>sys.stderr, "Db file %s is not readable" % (db_path)
+        raise IOError
+
+    if not isfile(db_path):
+        print >>sys.stderr, "Db file %s is not... a file!" % (db_path)
+        raise IOError
+
+    if getsize(db_path) < 100:  # SQLite database file header is 100 bytes
+        print >>sys.stderr, "Db file %s is not a SQLite file!" % (db_path)
+        raise IOError
+
+    with open(db_path, 'rb') as fd:
+        header = fd.read(100)
+
+    if header[:16] != 'SQLite format 3\x00':
+        print >>sys.stderr, "Db file %s is not in SQLiteFormat3!" % (db_path)
+        raise IOError
+
+    # Check if the file system allows I/O on sqlite3 (lustre)
+    # If not, copy on /dev/shm and remove after opening
+    try:
+        EMSL_local(db_path=db_path).list_basis_available()
+    except sqlite3.OperationalError:
+        print >>sys.stderr, "I/O Error for you file system"
+        print >>sys.stderr, "Try some fixe"
+        new_db_path = "/dev/shm/%d.db" % (os.getpid())
+        os.system("cp %s %s" % (db_path, new_db_path))
+        db_path = new_db_path
+    else:
+        changed = False
+        return db_path, changed
+
+    # Try again to check
+    try:
+        EMSL_local(db_path=db_path).list_basis_available()
+    except:
+        print >>sys.stderr, "Sorry..."
+        os.system("rm -f /dev/shm/%d.db" % (os.getpid()))
+        raise
+    else:
+        print >>sys.stderr, "Working !"
+        changed = True
+        return db_path, changed
+
+
+def cond_sql_or(table_name, l_value, glob=False):
+    """Take a table_name, a list of value and create the sql or combande"""
+
+    opr = "GLOB" if glob else "="
+
+    return [" OR ".join(['{} {} "{}"'.format(table_name,
+                                             opr,
+                                             val) for val in l_value])]
+
+
+def string_to_nb_mo(str_type):
+    """Take a string and return the nb of orbital"""
+
+    d = {"S": 3,
+         "P": 5,
+         "D": 7,
+         "SP": 8}
+
+    if str_type in d:
+        return d[str_type]
+    # ord("F") = 70 and ord("Z") = 87
+    elif 70 <= ord(str_type) <= 87:
+        # ord("F") = 70 and l = 4 so ofset if 66
+        return 2 * (ord(str_type) - 66) + 1
+    else:
+        raise BaseException
+
+#  _       __
+# |_ |\/| (_  |    |   _   _  _. |
+# |_ |  | __) |_   |_ (_) (_ (_| |
+#
+class EMSL_local:
+
+    """
+    All the method for using the EMSL db localy
+    """
+
+    def __init__(self, db_path=None):
+        self.db_path = db_path
+
+        self.conn = sqlite3.connect(self.db_path)
+        self.c = self.conn.cursor()
+
+        self.c.execute("SELECT * from format_tab")
+        self.format = self.c.fetchone()[0]
+
+    def list_basis_available(self,
+                             elts=[],
+                             basis=[],
+                             average_mo_number=False):
+        """
+        return all the basis name who contant all the elts
+        """
+        # If not elts just get the distinct name
+        # Else: 1) fetch for geting all the run_id whos satisfy the condition
+        #       2) If average_mo_number:
+        #            * Get name,descripption,data
+        #            * Then parse it
+        #          Else Get name,description
+        #       3) Parse it
+
+        # ~#~#~#~#~#~ #
+        # F i l t e r #
+        # ~#~#~#~#~#~ #
+
+        if basis:
+            cmd_filter_basis = " ".join(cond_sql_or("name", basis, glob=True))
+        else:
+            cmd_filter_basis = "(1)"
+
+        # Not Ets
+        if not elts:
+            if not average_mo_number:
+                cmd = """SELECT DISTINCT name, description
+                         FROM basis_tab
+                         WHERE {0}"""
+            else:
+                cmd = """SELECT DISTINCT name, description, data
+                         FROM output_tab
+                         WHERE {0}"""
+
+            cmd = cmd.format(cmd_filter_basis)
+
+        else:
+
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+            # G e t t i n g _ B a s i s I d #
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+
+            str_ = """SELECT DISTINCT basis_id
+                      FROM output_tab
+                      WHERE elt=? AND {0}""".format(cmd_filter_basis)
+
+            cmd = " INTERSECT ".join([str_] * len(elts)) + ";"
+            self.c.execute(cmd, elts)
+
+            l_basis_id = [i[0] for i in self.c.fetchall()]
+
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+            # C r e a t e _ t h e _ c m d #
+            # ~#~#~#~#~#~#~#~#~#~#~#~#~#~ #
+
+            cmd_filter_basis = " ".join(cond_sql_or("basis_id", l_basis_id))
+            cmd_filter_ele = " ".join(cond_sql_or("elt", elts))
+
+            column_to_fech = "name, description"
+            if average_mo_number:
+                column_to_fech += ", data"
+
+            filter_where = " ({}) AND ({})".format(
+                cmd_filter_ele,
+                cmd_filter_basis)
+
+            cmd = """SELECT DISTINCT {0}
+                     FROM output_tab
+                     WHERE {1}
+                     ORDER BY name""".format(column_to_fech, filter_where)
+        # ~#~#~#~#~ #
+        # F e t c h #
+        # ~#~#~#~#~ #
+
+        self.c.execute(cmd)
+        info = self.c.fetchall()
+
+        # ~#~#~#~#~#~#~ #
+        # P a r s i n g #
+        # ~#~#~#~#~#~#~ #
+        # If average_mo_number is asking
+
+        from collections import OrderedDict
+        dict_info = OrderedDict()
+        # Description : dict_info[name] = [description, nb_mo, nb_ele]
+
+        from src.parser_handler import get_symmetry_function
+        if average_mo_number:
+
+            f_symmetry = get_symmetry_function(self.format)
+
+            for name, description, atom_basis in info:
+
+                nb_mo = 0
+
+                line = atom_basis.split("\n")
+
+                for type_, _, _ in f_symmetry(line):
+
+                    nb_mo += string_to_nb_mo(type_)
+                try:
+                    dict_info[name][1] += nb_mo
+                    dict_info[name][2] += 1.
+                except KeyError:
+                    dict_info[name] = [description, nb_mo, 1.]
+
+        # ~#~#~#~#~#~ #
+        # R e t u r n #
+        # ~#~#~#~#~#~ #
+
+        if average_mo_number:
+            return[[k, v[0], str(v[1] / v[2])] for k, v in dict_info.iteritems()]
+        else:
+            return [i[:] for i in info]
+
+    def get_list_element_available(self, basis_name):
+
+        # ~#~#~#~#~#~ #
+        # F i l t e r #
+        # ~#~#~#~#~#~ #
+
+        str_ = """SELECT DISTINCT elt
+                  FROM output_tab
+                  WHERE name=(?) COLLATE NOCASE"""
+
+        # ~#~#~#~#~ #
+        # F e t c h #
+        # ~#~#~#~#~ #
+
+        self.c.execute(str_, basis_name)
+
+        # ~#~#~#~#~#~ #
+        # R e t u r n #
+        # ~#~#~#~#~#~ #
+
+        return [str(i[0]) for i in self.c.fetchall()]
+
+    def get_basis(self,
+                  basis_name, elts=None,
+                  handle_l_format=False, check_format=None):
+        """
+        Return the data from the basis set
+        """
+
+        # ~#~#~#~#~#~ #
+        # F i l t e r #
+        # ~#~#~#~#~#~ #
+
+        cmd_filter_ele = " ".join(cond_sql_or("elt", elts)) if elts else "(1)"
+
+        self.c.execute('''SELECT DISTINCT data from output_tab
+                     WHERE name="{0}"
+                     AND  {1}'''.format(basis_name, cmd_filter_ele))
+
+        # We need to take i[0] because fetchall return a tuple [(value),...]
+        l_atom_basis = [i[0].strip() for i in self.c.fetchall()]
+
+        # ~#~#~#~#~#~#~#~ #
+        # h a n d l e _ f #
+        # ~#~#~#~#~#~#~#~ #
+        if handle_l_format:
+            from src.parser_handler import get_handle_l_function
+            f = get_handle_l_function(self.format)
+            l_atom_basis = f(l_atom_basis)
+
+        # ~#~#~#~#~ #
+        # C h e c k #
+        # ~#~#~#~#~ #
+
+        if check_format:
+
+                from src.parser_handler import get_symmetry_function
+                from src.parser.check_validity import get_check_function
+
+                f = get_check_function(check_format)
+                f_symmetry = get_symmetry_function(self.format)
+
+                for atom_basis in l_atom_basis:
+                    lines = atom_basis.split("\n")
+                    for type_, _, _ in f_symmetry(lines):
+                        try:
+                            f(type_)
+                        except AssertionError:
+                            print "False. You have somme special function like SP"
+                            sys.exit(1)
+                        except BaseException:
+                            print "Fail !"
+                            sys.exit(1)
+
+        # ~#~#~#~#~#~ #
+        # R e t u r n #
+        # ~#~#~#~#~#~ #
+        return l_atom_basis
+if __name__ == "__main__":
+
+    e = EMSL_local(db_path="EMSL.db")
+    l = e.get_list_basis_available()
+    for i in l:
+        print i
+
+    l = e.get_list_element_available("pc-0")
+    print l
+
+    l = e.get_basis("cc-pVTZ", ["H", "He"])
+    for i in l:
+        print i
--- a/src/EMSL_utility.py
+++ b/src/EMSL_utility.py
@ -1,435 +0,0 @@
-# -*- coding: utf-8 -*-
-
-import sqlite3
-import re
-import sys
-import os
-import time
-
-debug = True
-
-elt_path = os.path.dirname(sys.argv[0]) + "/src/elts_abrev.dat"
-
-with open(elt_path, "r") as f:
-    data = f.readlines()
-
-dict_ele = dict()
-for i in data:
-    l = i.split("-")
-    dict_ele[l[1].strip().lower()] = l[2].strip().lower()
-
-
-def install_with_pip(name):
-
-    ins = False
-    d = {'y': True,
-         'n': False}
-
-    while True:
-        choice = raw_input('Do you want to install it ? [Y/N]')
-        try:
-            ins = d[choice.lower()]
-            break
-        except:
-            print "not a valid choice"
-
-    if ins:
-        try:
-            import pip
-            pip.main(['install', name])
-        except:
-            print "You need pip, (http://pip.readthedocs.org/en/latest/installing.html)"
-            sys.exit(1)
-
-
-def cond_sql_or(table_name, l_value):
-
-    l = []
-    dmy = " OR ".join(['%s = "%s"' % (table_name, i) for i in l_value])
-    if dmy:
-        l.append("(%s)" % dmy)
-
-    return l
-
-
-class EMSL_dump:
-
-    def __init__(self, db_path=None, format="GAMESS-US", contraction="True"):
-        self.db_path = db_path
-        self.format = format
-        self.contraction = str(contraction)
-
-        try:
-            import requests
-        except:
-            print "You need the requests package"
-            install_with_pip("requests")
-        finally:
-            self.requests = requests
-
-    def set_db_path(self, path):
-        """Define the database path"""
-        self.db_path = path
-
-    def dwl_basis_list_raw(self):
-        print "Download all the name available in EMSL. It can take some time.",
-        sys.stdout.flush()
-
-        """Download the source code of the iframe who contains the list of the basis set available"""
-
-        url = "https://bse.pnl.gov/bse/portal/user/anon/js_peid/11535052407933/panel/Main/template/content"
-        if debug:
-            import cPickle as pickle
-            dbcache = 'db/cache'
-            if not os.path.isfile(dbcache):
-                page = self.requests.get(url).text
-                file = open(dbcache, 'w')
-                pickle.dump(page, file)
-            else:
-                file = open(dbcache, 'r')
-                page = pickle.load(file)
-            file.close()
-
-        else:
-            page = self.requests.get(url).text
-
-        print "Done"
-        return page
-
-    def bl_raw_to_array(self, data_raw):
-        """Parse the raw html to create a basis set array whith all the info:
-        url, name,description"""
-
-        d = {}
-
-        for line in data_raw.split('\n'):
-            if "new basisSet(" in line:
-                b = line.find("(")
-                e = line.find(");")
-
-                s = line[b + 1:e]
-
-                tup = eval(s)
-                url = tup[0]
-                name = tup[1]
-
-                junkers = re.compile('[[" \]]')
-                elts = junkers.sub('', tup[3]).split(',')
-
-                des = tup[-1]
-
-                if "-ecp" in url.lower():
-                    continue
-                d[name] = [name, url, des, elts]
-
-        """Tric for the unicity of the name"""
-        array = [d[key] for key in d]
-
-        array_sort = sorted(array, key=lambda x: x[0])
-        print len(array_sort), "basisset will be download"
-
-        return array_sort
-
-    def create_url(self, url, name, elts):
-        """Create the adequate url to get the basis data"""
-
-        elts_string = " ".join(elts)
-
-        path = "https://bse.pnl.gov:443/bse/portal/user/anon/js_peid/11535052407933/action/portlets.BasisSetAction/template/courier_content/panel/Main/"
-        path += "/eventSubmit_doDownload/true"
-        path += "?bsurl=" + url
-        path += "&bsname=" + name
-        path += "&elts=" + elts_string
-        path += "&format=" + self.format
-        path += "&minimize=" + self.contraction
-        return path
-
-    def basis_data_row_to_array(self, data, name, des, elts):
-        """Parse the basis data raw html to get a nice tuple"""
-
-        d = []
-
-        b = data.find("$DATA")
-        e = data.find("$END")
-        if (b == -1 or data.find("$DATA$END") != -1):
-            if debug:
-                print data
-            raise Exception("WARNING not DATA")
-        else:
-            data = data.replace("PHOSPHOROUS", "PHOSPHORUS")
-            data = data.replace("D+", "E+")
-            data = data.replace("D-", "E-")
-
-            data = data[b + 5:e - 1].split('\n\n')
-
-            for (elt, data_elt) in zip(elts, data):
-
-                elt_long_th = dict_ele[elt.lower()]
-                elt_long_exp = data_elt.split()[0].lower()
-
-                if "$" in data_elt:
-                    print "Eror",
-                    raise Exception("WARNING not bad split")
-
-                if elt_long_th == elt_long_exp:
-                    d.append((name, des, elt, data_elt.strip()))
-                else:
-                    print "th", elt_long_th
-                    print "exp", elt_long_exp
-                    print "abv", elt
-                    raise Exception("WARNING not good ELEMENT")
-
-        return d
-
-    def create_sql(self, list_basis_array):
-        """Create the sql from the list of basis available data"""
-
-        conn = sqlite3.connect(self.db_path)
-        c = conn.cursor()
-
-        # Create table
-        c.execute('''CREATE TABLE all_value
-                 (name text, description text, elt text, data text)''')
-
-        import Queue
-        import threading
-
-        num_worker_threads = 7
-        attemps_max = 20
-
-        q_in = Queue.Queue(num_worker_threads)
-        q_out = Queue.Queue(num_worker_threads)
-
-        def worker():
-            """get a Job from the q_in, do stuff, when finish put it in the q_out"""
-            while True:
-                [name, url, des, elts] = q_in.get()
-                url = self.create_url(url, name, elts)
-
-                attemps = 0
-                while attemps < attemps_max:
-                    text = self.requests.get(url).text
-                    try:
-                        basis_data = self.basis_data_row_to_array(
-                            text, name, des, elts)
-                        break
-                    except:
-                        time.sleep(0.1)
-                        attemps += 1
-
-                try:
-                    q_out.put(([name, url, des, elts], basis_data))
-                    q_in.task_done()
-                except:
-                    print name, url, des
-                    raise
-
-        def enqueue():
-            for [name, url, des, elts] in list_basis_array:
-                q_in.put(([name, url, des, elts]))
-
-            return 0
-
-        t = threading.Thread(target=enqueue)
-        t.daemon = True
-        t.start()
-
-        for i in range(num_worker_threads):
-            t = threading.Thread(target=worker)
-            t.daemon = True
-            t.start()
-
-        nb_basis = len(list_basis_array)
-
-        for i in range(nb_basis):
-            [name, url, des, elts], basis_data = q_out.get()
-
-            try:
-                c.executemany(
-                    "INSERT INTO all_value VALUES (?,?,?,?)", basis_data)
-                conn.commit()
-
-                print '{:>3}'.format(i + 1), "/", nb_basis, name
-            except:
-                print '{:>3}'.format(i + 1), "/", nb_basis, name, "fail",
-                print '   ', [url, des, elts]
-                raise
-        conn.close()
-
-        q_in.join()
-
-    def new_db(self):
-        """Create new_db from scratch"""
-
-        _data = self.dwl_basis_list_raw()
-        array_basis = self.bl_raw_to_array(_data)
-        del _data
-
-        self.create_sql(array_basis)
-
-
-class EMSL_local:
-
-    def __init__(self, db_path=None):
-        self.db_path = db_path
-
-    def get_list_basis_available(self, elts=[]):
-
-        conn = sqlite3.connect(self.db_path)
-        c = conn.cursor()
-
-        if not elts:
-
-            c.execute("SELECT DISTINCT name,description from all_value")
-            data = c.fetchall()
-
-        else:
-            cmd = [
-                "SELECT name,description FROM all_value WHERE elt=?"] * len(elts)
-            cmd = " INTERSECT ".join(cmd) + ";"
-
-            c.execute(cmd, elts)
-            data = c.fetchall()
-
-        data = [i[:] for i in data]
-
-        conn.close()
-
-        return data
-
-    def get_list_element_available(self, basis_name):
-
-        conn = sqlite3.connect(self.db_path)
-        c = conn.cursor()
-
-        c.execute(
-            "SELECT DISTINCT elt from all_value WHERE name=:name_us COLLATE NOCASE", {
-                "name_us": basis_name})
-
-        data = c.fetchall()
-
-        data = [str(i[0]) for i in data]
-
-        conn.close()
-        return data
-
-    def get_basis(self, basis_name, elts=None, with_l=False):
-
-        def get_list_type(l_line):
-            l = []
-            for i, line in enumerate(l_line):
-
-                m = re.search(p, line)
-                if m:
-                    l.append([m.group(1), i])
-                    try:
-                        l[-2].append(i)
-                    except IndexError:
-                        pass
-
-            l[-1].append(i + 1)
-            return l
-
-        import re
-
-        #  __            _
-        # /__  _ _|_   _|_ ._ _  ._ _     _  _. |
-        # \_| (/_ |_    |  | (_) | | |   _> (_| |
-        #                                     |
-        conn = sqlite3.connect(self.db_path)
-        c = conn.cursor()
-
-        if elts:
-            cmd_ele = "AND " + " ".join(cond_sql_or("elt", elts))
-        else:
-            cmd_ele = ""
-
-        c.execute('''SELECT DISTINCT data from all_value
-                   WHERE name="{basis_name}" COLLATE NOCASE
-                   {cmd_ele}'''.format(basis_name=basis_name,
-                                       cmd_ele=cmd_ele))
-
-        l_data_raw = c.fetchall()
-        conn.close()
-
-        # |_|  _. ._   _| |  _    || | ||
-        # | | (_| | | (_| | (/_      |_
-        #
-
-        p = re.compile(ur'^(\w)\s+\d+\b')
-
-        l_data = []
-
-        for data_raw in l_data_raw:
-
-            basis = data_raw[0].strip()
-
-            l_line_raw = basis.split("\n")
-
-            l_line = [l_line_raw[0]]
-
-            for symmetry, begin, end in get_list_type(l_line_raw):
-
-                if not(with_l) and symmetry in "L":
-
-                    body_s = []
-                    body_p = []
-
-                    for i_l in l_line_raw[begin + 1:end]:
-
-                        a = i_l.split()
-
-                        common = "{:>3}".format(a[0])
-                        common += "{:>15.7f}".format(float(a[1]))
-
-                        tail_s = common + "{:>23.7f}".format(float(a[2]))
-                        body_s.append(tail_s)
-
-                        tail_p = common + "{:>23.7f}".format(float(a[3]))
-                        body_p.append(tail_p)
-
-                    l_line += [l_line_raw[begin].replace("L", "S")]
-                    l_line += body_s
-
-                    l_line += [l_line_raw[begin].replace("L", "P")]
-                    l_line += body_p
-                else:
-                    l_line += l_line_raw[begin:end]
-
-            l_data.append("\n".join(l_line))
-
-        return l_data
-
-
-format_dict = \
-    {
-        "g94": "Gaussian94",
-        "gamess-us": "GAMESS-US",
-        "gamess-uk": "GAMESS-UK",
-        "turbomole": "Turbomole",
-        "tx93": "TX93",
-        "molpro": "Molpro",
-        "molproint": "MolproInt",
-        "hondo": "Hondo",
-        "supermolecule": "SuperMolecule",
-        "molcas": "Molcas",
-        "hyperchem": "HyperChem",
-        "dalton": "Dalton",
-        "demon-ks": "deMon-KS",
-        "demon2k": "deMon2k",
-        "aces2": "AcesII",
-    }
-
-if __name__ == "__main__":
-
-    e = EMSL_local(db_path="EMSL.db")
-    l = e.get_list_basis_available()
-    for i in l:
-        print i
-
-    l = e.get_list_element_available("pc-0")
-    print l
-
-    l = e.get_basis("cc-pVTZ", ["H", "He"])
-    for i in l:
-        print i
--- a/src/misc/init.py
+++ b/src/misc/init.py
--- a/src/misc/docopt.py
+++ b/src/misc/docopt.py
@ -59,12 +59,20 @@ class Pattern(object):
        either = [list(child.children) for child in transform(self).children]
        for case in either:
            for e in [child for child in case if case.count(child) > 1]:
-                if type(e) is Argument or type(e) is Option and e.argcount:
+                if isinstance(
+                        e,
+                        Argument) or isinstance(
+                        e,
+                        Option) and e.argcount:
                    if e.value is None:
                        e.value = []
-                    elif type(e.value) is not list:
+                    elif not isinstance(e.value, list):
                        e.value = e.value.split()
-                if type(e) is Command or type(e) is Option and e.argcount == 0:
+                if isinstance(
+                        e,
+                        Command) or isinstance(
+                        e,
+                        Option) and e.argcount == 0:
                    e.value = 0
        return self

@ -84,10 +92,10 @@ def transform(pattern):
        if any(t in map(type, children) for t in parents):
            child = [c for c in children if type(c) in parents][0]
            children.remove(child)
-            if type(child) is Either:
+            if isinstance(child, Either):
                for c in child.children:
                    groups.append([c] + children)
-            elif type(child) is OneOrMore:
+            elif isinstance(child, OneOrMore):
                groups.append(child.children * 2 + children)
            else:
                groups.append(child.children + children)
@ -117,10 +125,10 @@ class LeafPattern(Pattern):
        left_ = left[:pos] + left[pos + 1:]
        same_name = [a for a in collected if a.name == self.name]
        if type(self.value) in (int, list):
-            if type(self.value) is int:
+            if isinstance(self.value, int):
                increment = 1
            else:
-                increment = ([match.value] if type(match.value) is str
+                increment = ([match.value] if isinstance(match.value, str)
                             else match.value)
            if not same_name:
                match.value = increment
@ -151,7 +159,7 @@ class Argument(LeafPattern):

    def single_match(self, left):
        for n, pattern in enumerate(left):
-            if type(pattern) is Argument:
+            if isinstance(pattern, Argument):
                return n, Argument(self.name, pattern.value)
        return None, None

@ -169,7 +177,7 @@ class Command(Argument):

    def single_match(self, left):
        for n, pattern in enumerate(left):
-            if type(pattern) is Argument:
+            if isinstance(pattern, Argument):
                if pattern.value == self.name:
                    return n, Command(self.name, True)
                else:
--- a/src/misc/elts_abrev.dat
+++ b/src/misc/elts_abrev.dat
--- a/src/parser/init.py
+++ b/src/parser/init.py
--- a/src/parser/check_validity.py
+++ b/src/parser/check_validity.py
@ -0,0 +1,52 @@
+#  _
+# /  |_   _   _ |        _. | o  _| o _|_
+# \_ | | (/_ (_ |<   \/ (_| | | (_| |  |_ \/
+#                                         /
+# Do this After the L special case traitement.
+
+import sys
+
+
+def check_gamess(str_type):
+    """Check is the orbital type is handle by gamess"""
+
+    assert len(str_type) == 1
+
+    if str_type in "S P D".split():
+        return True
+    elif str_type == "SP":
+        raise BaseException
+    else:
+        return True
+
+
+def check_NWChem(str_type):
+    """Check is the orbital type is handle by gamess"""
+
+    assert len(str_type) == 1
+
+    if str_type in "S P D".split():
+        return True
+    elif str_type > "I" or str_type in "K L M".split():
+        raise BaseException
+    else:
+        return True
+
+
+d_check = {"GAMESS-US": check_gamess,
+           "NWChem": check_NWChem}
+
+
+def get_check_function(name_program):
+    """
+    Tranforme SP special function (create using get_symmetry_function)
+    into S and P
+    """
+    try:
+        f = d_check[name_program]
+    except KeyError:
+        str_ = "You need to add a check funtion for your program {0}"
+        print >> sys.stderr, str_.format(name_program)
+        print >> sys.stderr, "This one are avalaible {0}".format(d_check.keys())
+        sys.exit(1)
+    return f
--- a/src/parser/gamess_us.py
+++ b/src/parser/gamess_us.py
@ -0,0 +1,138 @@
+#  __
+# /__  _. ._ _   _   _  _        _
+# \_| (_| | | | (/_ _> _>   |_| _>
+#
+
+from src.parser_handler import get_dict_ele
+import re
+
+
+def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
+    """Parse the basis data raw html of gamess-us to get a nice tuple
+       Return (name, description, [[ele, data_ele],...])"""
+    basis_data = []
+
+    b = data.find("$DATA")
+    e = data.find("$END")
+    if (b == -1 or data.find("$DATA$END") != -1):
+        if debug:
+            print data
+        raise Exception("WARNING not DATA")
+    else:
+        dict_replace = {"PHOSPHOROUS": "PHOSPHORUS",
+                        "D+": "E+",
+                        "D-": "E-"}
+
+        for k, v in dict_replace.iteritems():
+            data = data.replace(k, v)
+
+        data = data[b + 5:e - 1].split('\n\n')
+
+        dict_ele = get_dict_ele()
+
+        for (elt, data_elt) in zip(elts, data):
+
+            elt_long_th = dict_ele[elt.lower()]
+            elt_long_exp = data_elt.split()[0].lower()
+
+            if "$" in data_elt:
+                if debug:
+                    print "Eror",
+                raise Exception("WARNING bad split")
+
+            if elt_long_th == elt_long_exp:
+                basis_data.append([elt, data_elt.strip()])
+            else:
+                if debug:
+                    print "th", elt_long_th
+                    print "exp", elt_long_exp
+                    print "abv", elt
+                raise Exception("WARNING not a good ELEMENT")
+
+    return (name, des, basis_data)
+
+
+symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
+
+
+def l_symmetry_gamess_us(atom_basis):
+    """
+    Return the begin and the end of all the type of orbital
+    input: atom_basis = [name, S 1, 12 0.12 12212, ...]
+    output: [ [type, begin, end], ...]
+    """
+    # Example
+    # [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]"
+
+    l = []
+    for i, line in enumerate(atom_basis):
+        # Optimisation for not seaching all the time
+        if len(line) < 10:
+            m = re.search(symmetry_regex, line)
+            if m:
+                # Cause of L !
+                read_symmetry = m.group(1)
+
+                # L is real L or special SP
+                # Just check the number of exponant
+                if all([read_symmetry == "L",
+                        len(atom_basis[i + 1].split()) == 4]):
+                    real_symmetry = "SP"
+                else:
+                    real_symmetry = read_symmetry
+
+                l.append([real_symmetry, i])
+                try:
+                    l[-2].append(i)
+                except IndexError:
+                    pass
+
+    l[-1].append(i + 1)
+    return l
+
+
+def handle_l_gamess_us(l_atom_basis):
+    """
+    Read l_atom_basis and change the SP in L and P
+    """
+
+    l_data = []
+    for atom_basis in l_atom_basis:
+
+        # Split the data in line
+        l_line_raw = atom_basis.split("\n")
+        l_line = [l_line_raw[0]]
+        # l_line_raw[0] containt the name of the Atom
+
+        for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw):
+
+            if symmetry == "SP":
+
+                body_s = []
+                body_p = []
+
+                for i_l in l_line_raw[begin + 1:end]:
+
+                    # one L =>  S & P
+                    a = i_l.split()
+
+                    common = "{:>3}".format(a[0])
+                    common += "{:>15.7f}".format(float(a[1]))
+
+                    tail_s = common + "{:>23.7f}".format(float(a[2]))
+                    body_s.append(tail_s)
+
+                    tail_p = common + "{:>23.7f}".format(float(a[3]))
+                    body_p.append(tail_p)
+
+                l_line += [l_line_raw[begin].replace("L", "S")]
+                l_line += body_s
+
+                l_line += [l_line_raw[begin].replace("L", "P")]
+                l_line += body_p
+            else:
+                l_line += l_line_raw[begin:end]
+
+        l_data.append("\n".join(l_line))
+
+    return l_data
--- a/src/parser/gaussian94.py
+++ b/src/parser/gaussian94.py
@ -0,0 +1,83 @@
+#  __                            _
+# /__  _.      _  _ o  _. ._    (_| |_|_
+# \_| (_| |_| _> _> | (_| | |     |   |
+#
+import sys
+
+
+def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
+    """Parse the Gaussian94 basis data raw html to get a nice tuple.
+
+    The data-pairs item is actually expected to be a 2 item list:
+    [symbol, data]
+
+    e.g. ["Ca", "#BASIS SET..."]
+
+    N.B.: Currently ignores ECP data!
+
+    @param data: raw HTML from BSE
+    @type data : unicode
+    @param name: basis set name
+    @type name : str
+    @param des: basis set description
+    @type des : str
+    @param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
+    @type elements : list
+    @return: (name, description, data-pairs)
+    @rtype : tuple
+    """
+
+    # Each basis set block starts and ends with ****. Find the region
+    # containing all the basis blocks using the first and last ****.
+    mark = "****"
+    begin = data.find(mark)
+    end = data.rfind(mark)
+
+    if begin == -1 or end == -1:
+        if debug:
+            print(data)
+        str_ = " No basis set data found while attempting to process {0} ({1})"
+        raise ValueError(str_.format(name, description))
+
+    trimmed = data[begin + len(mark): end - len(mark)].strip()
+    chunks = []
+    lines = []
+
+    # group lines of data delimited by mark into per-element chunks
+    for line in trimmed.split("\n"):
+        if line.startswith(mark):
+            if lines:
+                chunks.append(lines)
+            lines = [line]
+        else:
+            lines.append(line)
+
+    # handle trailing chunk that is not followed by another basis set block
+    # also remove the marker lines from the chunk itself
+    if lines and (not chunks or lines != chunks[-1]):
+        chunks.append(lines)
+
+    # join lines back into solid text blocks
+    chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
+
+    # check each block for element and assign symbols to final pairs
+    pairs = []
+    unused_elements = set([e.upper() for e in elements])
+    for chunk in chunks:
+        # get first 3 chars of first line in block
+        symbol = chunk.split("\n")[0][:3].strip()
+        try:
+            unused_elements.remove(symbol.upper())
+        except KeyError:
+            if debug:
+                msg = "Warning: already processed {0}\n".format(symbol)
+                sys.stderr.write(msg)
+        pairs.append([symbol, chunk])
+
+    if unused_elements:
+        msg = "Warning: elements {0} left over for {1}".format(
+            list(unused_elements),
+            name)
+        print(msg)
+
+    return (name, description, pairs)
--- a/src/parser/nwchem.py
+++ b/src/parser/nwchem.py
@ -0,0 +1,228 @@
+#            _
+# |\ |      /  |_   _  ._ _
+# | \| \/\/ \_ | | (/_ | | |
+#
+import json
+
+
+def extract_basis_nwchem(data, name):
+    """Extract atomic orbital, charge density fitting, or exchange
+    correlation functional basis data from a text region passed in as
+    data. The charge density fitting and exchange correlation functional
+    basis set data are employed for density functional calculations.
+
+    @param data: text region containing basis set data
+    @type data : str
+    @param name: name of basis type: "ao basis", "cd basis", or "xc basis"
+    @type name : str
+    @return: per-element basis set chunks
+    @rtype : list
+    """
+
+    begin_marker = """BASIS "{0}" PRINT""".format(name)
+    end_marker = "END"
+
+    # search for the basis set data begin marker
+    # calling "upper" on data because original data has inconsistent
+    # capitalization
+    begin = data.upper().find(begin_marker.upper())
+    end = data.upper().find(end_marker, begin)
+
+    # No basis data found
+    if begin == -1:
+        return []
+
+    trimmed = data[begin + len(begin_marker): end - len(end_marker)].strip()
+
+    chunks = []
+    lines = []
+
+    # group lines of data delimited by #BASIS SET... into per-element chunks
+    for line in trimmed.split("\n"):
+        if line.upper().startswith("#BASIS SET"):
+            if lines:
+                chunks.append(lines)
+            lines = [line]
+        else:
+            lines.append(line)
+
+    # handle trailing chunk that is not followed by another #BASIS SET...
+    if lines and (not chunks or lines != chunks[-1]):
+        chunks.append(lines)
+
+    # join lines back into solid text blocks
+    chunks = ["\n".join(c) for c in chunks]
+    return chunks
+
+
+def extract_ecp_nwchem(data):
+    """Extract the effective core potential basis data from a text region
+    passed in as data.
+
+    @param data: text region containing ECP data
+    @type data : str
+    @return: per-element effective core potential chunks
+    @rtype : list
+    """
+
+    ecp_begin_mark = "ECP\n"
+    ecp_end_mark = "END"
+    ecp_begin = data.upper().find(ecp_begin_mark)
+    ecp_end = data.upper().find(ecp_end_mark, ecp_begin)
+    ecp_region = ""
+
+    if ecp_begin > -1 and ecp_end > -1:
+        ecp_region = data[
+            ecp_begin +
+            len(ecp_begin_mark): ecp_end -
+            len(ecp_end_mark)].strip()
+
+    # No ECP data, so return empty list
+    else:
+        return []
+
+    chunks = []
+    lines = []
+
+    # group lines of data delimited by XX nelec YY into chunks, e.g.
+    # "Zn nelec 18" begins a zinc ECP
+    for line in ecp_region.split("\n"):
+        if line.lower().find(" nelec ") > -1:
+            if lines:
+                chunks.append(lines)
+            lines = [line]
+        else:
+            lines.append(line)
+
+    # handle trailing chunk that is not followed by another XX nelec YY..
+    if lines and (not chunks or lines != chunks[-1]):
+        chunks.append(lines)
+
+    # join lines back into solid text blocks
+    chunks = ["\n".join(c) for c in chunks]
+    return chunks
+
+
+def unpack_nwchem_basis_block(data):
+    """Unserialize a NWChem basis data block and extract components
+
+    @param data: a JSON of basis set data, perhaps containing many types
+    @type data : str
+    @return: unpacked data
+    @rtype : dict
+    """
+
+    unpacked = json.loads(data)
+    return unpacked
+
+
+def parse_basis_data_nwchem(data, name, description, elements, debug=True):
+    """Parse the NWChem basis data raw html to get a nice tuple.
+
+    The data-pairs item is actually expected to be a 2 item list:
+    [symbol, data]
+
+    e.g. ["Ca", "#BASIS SET..."]
+
+    @param data: raw HTML from BSE
+    @type data : unicode
+    @param name: basis set name
+    @type name : str
+    @param des: basis set description
+    @type des : str
+    @param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
+    @type elements : list
+    @return: (name, description, data-pairs)
+    @rtype : tuple
+    """
+
+    unused_elements = set([e.upper() for e in elements])
+
+    def extract_symbol(txt):
+        for sline in txt.split("\n"):
+            if not sline.startswith("#"):
+                try:
+                    symbol = sline[:3].strip().split()[0]
+                    return symbol
+                except IndexError:
+                    continue
+
+        raise ValueError("Can't find element symbol in {0}".format(txt))
+
+    ao_chunks = extract_basis_nwchem(data, "ao basis")
+    cd_chunks = extract_basis_nwchem(data, "cd basis")
+    xc_chunks = extract_basis_nwchem(data, "xc basis")
+    ecp_chunks = extract_ecp_nwchem(data)
+
+    if not any([ao_chunks, cd_chunks, xc_chunks, ecp_chunks]):
+        str_ = "No basis set data found while attempting to process {0} ({1})"
+        raise ValueError(str_.format(name, description))
+
+    # Tag all used elements, whether from ordinary AO basis or ECP section
+    for chunk in ao_chunks + cd_chunks + xc_chunks + ecp_chunks:
+        try:
+            symbol = extract_symbol(chunk)
+            unused_elements.remove(symbol.upper())
+        except KeyError:
+            pass
+
+    if unused_elements:
+        msg = "Warning: elements {0} left over for {1}"
+        print msg.format(list(unused_elements), name)
+
+    # Form packed chunks, turn packed chunks into pairs
+    used_elements = set()
+    packed = {}
+
+    for cgroup, gname in [(ao_chunks, "ao basis"), (cd_chunks, "cd basis"),
+                          (xc_chunks, "xc basis"), (ecp_chunks, "ecp")]:
+        for chunk in cgroup:
+            symbol = extract_symbol(chunk)
+
+            # Expand entry, e.g. add ecp data for Na after it has ao basis
+            try:
+                idx, ch = packed[symbol]
+                ch[gname] = chunk
+                chunk_dict = ch.copy()
+            # Create fresh entry, e.g. add Na with initial ao basis
+            except KeyError:
+                chunk_dict = {gname: chunk}
+                idx = len(used_elements)
+                used_elements.add(symbol)
+
+            packed[symbol] = (idx, chunk_dict)
+
+    """
+        for chunk in ao_chunks:
+            symbol = extract_symbol(chunk)
+            chunk_dict = {"ao basis" : chunk}
+            idx = len(used_elements)
+            used_elements.add(symbol)
+            packed[symbol] = (idx, chunk_dict)
+
+        for chunk in ecp_chunks:
+            symbol = extract_symbol(chunk)
+            #add ECP data if existing chunk, else create fresh chunk
+            try:
+                idx, ch = packed[symbol]
+                ch["ecp"] = chunk
+                chunk_dict = ch.copy()
+            except KeyError:
+                chunk_dict = {"ecp" : chunk}
+                idx = len(used_elements)
+                used_elements.add(symbol)
+            packed[symbol] = (idx, chunk_dict)
+        """
+
+    values = sorted(packed.values())
+
+    # Assign (Symbol, Serialized) to final pairs
+    pairs = []
+    for idx, chunk in values:
+        symbol = extract_symbol(chunk.get("ao basis")
+                                or chunk.get("cd basis")
+                                or chunk.get("xc basis")
+                                or chunk.get("ecp"))
+        serialized = json.dumps(chunk)
+        pairs.append([symbol, serialized])
+    return [name, description, pairs]
--- a/src/parser_handler.py
+++ b/src/parser_handler.py
@ -0,0 +1,138 @@
+import sys
+import os
+import re
+
+
+def get_dict_ele():
+    """Return dict[atom]=[abreviation]"""
+    elt_path = os.path.dirname(sys.argv[0]) + "/src/misc/elts_abrev.dat"
+
+    with open(elt_path, "r") as f:
+        data = f.readlines()
+
+    dict_ele = dict()
+    for i in data:
+        l = i.split("-")
+        dict_ele[l[1].strip().lower()] = l[2].strip().lower()
+
+    return dict_ele
+
+# ______                         _         _ _      _
+# |  ___|                       | |       | (_)    | |
+# | |_ _ __ ___  _ __ ___   __ _| |_    __| |_  ___| |_
+# |  _| '__/ _ \| '_ ` _ \ / _` | __|  / _` | |/ __| __|
+# | | | | | (_) | | | | | | (_| | |_  | (_| | | (__| |_
+# \_| |_|  \___/|_| |_| |_|\__,_|\__|  \__,_|_|\___|\__|
+#
+from src.parser.gamess_us import parse_basis_data_gamess_us
+from src.parser.gaussian94 import parse_basis_data_gaussian94
+from src.parser.nwchem import parse_basis_data_nwchem
+
+
+parser_dict = {"Gaussian94": parse_basis_data_gaussian94,
+               "GAMESS-US": parse_basis_data_gamess_us,
+               "NWChem": parse_basis_data_nwchem,
+               "GAMESS-UK": None,
+               "Turbomole": None,
+               "TX93": None,
+               "Molpro": None,
+               "MolproInt": None,
+               "Hondo": None,
+               "SuperMolecule": None,
+               "Molcas": None,
+               "HyperChem": None,
+               "Dalton": None,
+               "deMon-KS": None,
+               "deMon2k": None,
+               "AcesII": None}
+
+
+def check_format(format):
+    try:
+        parser_dict[format]
+    except KeyError:
+        str_ = ["This format ({0}) is not available in EMSL".format(format),
+                "EMSL provide this list : {0}".format(parser_dict.keys())]
+        print >> sys.stderr, "\n".join(str_)
+        sys.exit(1)
+    else:
+        return format
+
+
+def get_parser_function(format):
+    if not parser_dict[format]:
+        list_parser = [k for k, v in parser_dict.iteritems() if v]
+
+        str_ = ["We have no parser for this format {0}".format(format),
+                "We only support {0}".format(list_parser),
+                "Fill free to Fock /pull request",
+                "You just need to add a function like this one:",
+                "'src.pars.gamess_us.parse_basis_data_gamess_us'"]
+        print >> sys.stderr, "\n".join(str_)
+        sys.exit(1)
+    else:
+        return parser_dict[format]
+
+#  _____                                _                    _ _      _
+# /  ___|                              | |                  | (_)    | |
+# \ `--. _   _ _ __ ___  _ __ ___   ___| |_ _ __ _   _    __| |_  ___| |_
+#  `--. \ | | | '_ ` _ \| '_ ` _ \ / _ \ __| '__| | | |  / _` | |/ __| __|
+# /\__/ / |_| | | | | | | | | | | |  __/ |_| |  | |_| | | (_| | | (__| |_
+# \____/ \__, |_| |_| |_|_| |_| |_|\___|\__|_|   \__, |  \__,_|_|\___|\__|
+#         __/ |                                   __/ |
+#        |___/                                   |___/
+
+"""
+Return the begin and the end of all the type of orbital
+input: atom_basis = [name, S 1, 12 0.12 12212, ...]
+output: [ [type, begin, end], ...]
+"""
+
+from src.parser.gamess_us import l_symmetry_gamess_us
+
+symmetry_dict = {"GAMESS-US": l_symmetry_gamess_us}
+
+
+def get_symmetry_function(format):
+    """
+    Return the begin and the end of all the type of orbital
+    input: atom_basis = [name, S 1, 12 0.12 12212, ...]
+    output: [ [type, begin, end], ...]
+    """
+    try:
+        f = symmetry_dict[format]
+    except KeyError:
+        print >> sys.stderr, "You need to add a function in symmetry_dict"
+        print >> sys.stderr, "for your format ({0})".format(format)
+        sys.exit(1)
+    else:
+        return f
+
+#  _   _                 _ _        _ _ _    _ _  ______ _      _
+# | | | |               | | |      ( | ) |  ( | ) |  _  (_)    | |
+# | |_| | __ _ _ __   __| | | ___   V V| |   V V  | | | |_  ___| |_
+# |  _  |/ _` | '_ \ / _` | |/ _ \     | |        | | | | |/ __| __|
+# | | | | (_| | | | | (_| | |  __/     | |____    | |/ /| | (__| |_
+# \_| |_/\__,_|_| |_|\__,_|_|\___|     \_____/    |___/ |_|\___|\__|
+
+"""
+Tranforme SP special function (create using get_symmetry_function) into S and P
+"""
+from src.parser.gamess_us import handle_l_gamess_us
+
+handle_l_dict = {"GAMESS-US": handle_l_gamess_us}
+
+
+def get_handle_l_function(format):
+    """
+    Tranforme SP special function (create using get_symmetry_function)
+    into S and P
+    """
+    try:
+        f = handle_l_dict[format]
+    except KeyError:
+        print >> sys.stderr, "You need to add a function in handle_l_dict"
+        print >> sys.stderr, "for your format ({0})".format(format)
+        sys.exit(1)
+    else:
+        return f