mirror of
https://github.com/LCPQ/EMSL_Basis_Set_Exchange_Local
synced 2025-01-03 01:55:54 +01:00
Adding parser...
This commit is contained in:
parent
d1f0515ebb
commit
7d146f0e8a
0
src/misc/__init__.py
Normal file
0
src/misc/__init__.py
Normal file
590
src/misc/docopt.py
Normal file
590
src/misc/docopt.py
Normal file
@ -0,0 +1,590 @@
|
||||
"""Pythonic command-line interface parser that will make you smile.
|
||||
|
||||
* http://docopt.org
|
||||
* Repository and issue-tracker: https://github.com/docopt/docopt
|
||||
* Licensed under terms of MIT license (see LICENSE-MIT)
|
||||
* Copyright (c) 2013 Vladimir Keleshev, vladimir@keleshev.com
|
||||
|
||||
"""
|
||||
import sys
|
||||
import re
|
||||
|
||||
|
||||
__all__ = ['docopt']
|
||||
__version__ = '0.6.1'
|
||||
|
||||
|
||||
class DocoptLanguageError(Exception):
|
||||
|
||||
"""Error in construction of usage-message by developer."""
|
||||
|
||||
|
||||
class DocoptExit(SystemExit):
|
||||
|
||||
"""Exit in case user invoked program with incorrect arguments."""
|
||||
|
||||
usage = ''
|
||||
|
||||
def __init__(self, message=''):
|
||||
SystemExit.__init__(self, (message + '\n' + self.usage).strip())
|
||||
|
||||
|
||||
class Pattern(object):
|
||||
|
||||
def __eq__(self, other):
|
||||
return repr(self) == repr(other)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(repr(self))
|
||||
|
||||
def fix(self):
|
||||
self.fix_identities()
|
||||
self.fix_repeating_arguments()
|
||||
return self
|
||||
|
||||
def fix_identities(self, uniq=None):
|
||||
"""Make pattern-tree tips point to same object if they are equal."""
|
||||
if not hasattr(self, 'children'):
|
||||
return self
|
||||
uniq = list(set(self.flat())) if uniq is None else uniq
|
||||
for i, child in enumerate(self.children):
|
||||
if not hasattr(child, 'children'):
|
||||
assert child in uniq
|
||||
self.children[i] = uniq[uniq.index(child)]
|
||||
else:
|
||||
child.fix_identities(uniq)
|
||||
|
||||
def fix_repeating_arguments(self):
|
||||
"""Fix elements that should accumulate/increment values."""
|
||||
either = [list(child.children) for child in transform(self).children]
|
||||
for case in either:
|
||||
for e in [child for child in case if case.count(child) > 1]:
|
||||
if isinstance(
|
||||
e,
|
||||
Argument) or isinstance(
|
||||
e,
|
||||
Option) and e.argcount:
|
||||
if e.value is None:
|
||||
e.value = []
|
||||
elif not isinstance(e.value, list):
|
||||
e.value = e.value.split()
|
||||
if isinstance(
|
||||
e,
|
||||
Command) or isinstance(
|
||||
e,
|
||||
Option) and e.argcount == 0:
|
||||
e.value = 0
|
||||
return self
|
||||
|
||||
|
||||
def transform(pattern):
|
||||
"""Expand pattern into an (almost) equivalent one, but with single Either.
|
||||
|
||||
Example: ((-a | -b) (-c | -d)) => (-a -c | -a -d | -b -c | -b -d)
|
||||
Quirks: [-a] => (-a), (-a...) => (-a -a)
|
||||
|
||||
"""
|
||||
result = []
|
||||
groups = [[pattern]]
|
||||
while groups:
|
||||
children = groups.pop(0)
|
||||
parents = [Required, Optional, OptionsShortcut, Either, OneOrMore]
|
||||
if any(t in map(type, children) for t in parents):
|
||||
child = [c for c in children if type(c) in parents][0]
|
||||
children.remove(child)
|
||||
if isinstance(child, Either):
|
||||
for c in child.children:
|
||||
groups.append([c] + children)
|
||||
elif isinstance(child, OneOrMore):
|
||||
groups.append(child.children * 2 + children)
|
||||
else:
|
||||
groups.append(child.children + children)
|
||||
else:
|
||||
result.append(children)
|
||||
return Either(*[Required(*e) for e in result])
|
||||
|
||||
|
||||
class LeafPattern(Pattern):
|
||||
|
||||
"""Leaf/terminal node of a pattern tree."""
|
||||
|
||||
def __init__(self, name, value=None):
|
||||
self.name, self.value = name, value
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%r, %r)' % (self.__class__.__name__, self.name, self.value)
|
||||
|
||||
def flat(self, *types):
|
||||
return [self] if not types or type(self) in types else []
|
||||
|
||||
def match(self, left, collected=None):
|
||||
collected = [] if collected is None else collected
|
||||
pos, match = self.single_match(left)
|
||||
if match is None:
|
||||
return False, left, collected
|
||||
left_ = left[:pos] + left[pos + 1:]
|
||||
same_name = [a for a in collected if a.name == self.name]
|
||||
if type(self.value) in (int, list):
|
||||
if isinstance(self.value, int):
|
||||
increment = 1
|
||||
else:
|
||||
increment = ([match.value] if isinstance(match.value, str)
|
||||
else match.value)
|
||||
if not same_name:
|
||||
match.value = increment
|
||||
return True, left_, collected + [match]
|
||||
same_name[0].value += increment
|
||||
return True, left_, collected
|
||||
return True, left_, collected + [match]
|
||||
|
||||
|
||||
class BranchPattern(Pattern):
|
||||
|
||||
"""Branch/inner node of a pattern tree."""
|
||||
|
||||
def __init__(self, *children):
|
||||
self.children = list(children)
|
||||
|
||||
def __repr__(self):
|
||||
return '%s(%s)' % (self.__class__.__name__,
|
||||
', '.join(repr(a) for a in self.children))
|
||||
|
||||
def flat(self, *types):
|
||||
if type(self) in types:
|
||||
return [self]
|
||||
return sum([child.flat(*types) for child in self.children], [])
|
||||
|
||||
|
||||
class Argument(LeafPattern):
|
||||
|
||||
def single_match(self, left):
|
||||
for n, pattern in enumerate(left):
|
||||
if isinstance(pattern, Argument):
|
||||
return n, Argument(self.name, pattern.value)
|
||||
return None, None
|
||||
|
||||
@classmethod
|
||||
def parse(class_, source):
|
||||
name = re.findall('(<\S*?>)', source)[0]
|
||||
value = re.findall('\[default: (.*)\]', source, flags=re.I)
|
||||
return class_(name, value[0] if value else None)
|
||||
|
||||
|
||||
class Command(Argument):
|
||||
|
||||
def __init__(self, name, value=False):
|
||||
self.name, self.value = name, value
|
||||
|
||||
def single_match(self, left):
|
||||
for n, pattern in enumerate(left):
|
||||
if isinstance(pattern, Argument):
|
||||
if pattern.value == self.name:
|
||||
return n, Command(self.name, True)
|
||||
else:
|
||||
break
|
||||
return None, None
|
||||
|
||||
|
||||
class Option(LeafPattern):
|
||||
|
||||
def __init__(self, short=None, long=None, argcount=0, value=False):
|
||||
assert argcount in (0, 1)
|
||||
self.short, self.long, self.argcount = short, long, argcount
|
||||
self.value = None if value is False and argcount else value
|
||||
|
||||
@classmethod
|
||||
def parse(class_, option_description):
|
||||
short, long, argcount, value = None, None, 0, False
|
||||
options, _, description = option_description.strip().partition(' ')
|
||||
options = options.replace(',', ' ').replace('=', ' ')
|
||||
for s in options.split():
|
||||
if s.startswith('--'):
|
||||
long = s
|
||||
elif s.startswith('-'):
|
||||
short = s
|
||||
else:
|
||||
argcount = 1
|
||||
if argcount:
|
||||
matched = re.findall('\[default: (.*)\]', description, flags=re.I)
|
||||
value = matched[0] if matched else None
|
||||
return class_(short, long, argcount, value)
|
||||
|
||||
def single_match(self, left):
|
||||
for n, pattern in enumerate(left):
|
||||
if self.name == pattern.name:
|
||||
return n, pattern
|
||||
return None, None
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return self.long or self.short
|
||||
|
||||
def __repr__(self):
|
||||
return 'Option(%r, %r, %r, %r)' % (self.short, self.long,
|
||||
self.argcount, self.value)
|
||||
|
||||
|
||||
class Required(BranchPattern):
|
||||
|
||||
def match(self, left, collected=None):
|
||||
collected = [] if collected is None else collected
|
||||
l = left
|
||||
c = collected
|
||||
for pattern in self.children:
|
||||
matched, l, c = pattern.match(l, c)
|
||||
if not matched:
|
||||
return False, left, collected
|
||||
return True, l, c
|
||||
|
||||
|
||||
class Optional(BranchPattern):
|
||||
|
||||
def match(self, left, collected=None):
|
||||
collected = [] if collected is None else collected
|
||||
for pattern in self.children:
|
||||
m, left, collected = pattern.match(left, collected)
|
||||
return True, left, collected
|
||||
|
||||
|
||||
class OptionsShortcut(Optional):
|
||||
|
||||
"""Marker/placeholder for [options] shortcut."""
|
||||
|
||||
|
||||
class OneOrMore(BranchPattern):
|
||||
|
||||
def match(self, left, collected=None):
|
||||
assert len(self.children) == 1
|
||||
collected = [] if collected is None else collected
|
||||
l = left
|
||||
c = collected
|
||||
l_ = None
|
||||
matched = True
|
||||
times = 0
|
||||
while matched:
|
||||
# could it be that something didn't match but changed l or c?
|
||||
matched, l, c = self.children[0].match(l, c)
|
||||
times += 1 if matched else 0
|
||||
if l_ == l:
|
||||
break
|
||||
l_ = l
|
||||
if times >= 1:
|
||||
return True, l, c
|
||||
return False, left, collected
|
||||
|
||||
|
||||
class Either(BranchPattern):
|
||||
|
||||
def match(self, left, collected=None):
|
||||
collected = [] if collected is None else collected
|
||||
outcomes = []
|
||||
for pattern in self.children:
|
||||
matched, _, _ = outcome = pattern.match(left, collected)
|
||||
if matched:
|
||||
outcomes.append(outcome)
|
||||
if outcomes:
|
||||
return min(outcomes, key=lambda outcome: len(outcome[1]))
|
||||
return False, left, collected
|
||||
|
||||
|
||||
class Tokens(list):
|
||||
|
||||
def __init__(self, source, error=DocoptExit):
|
||||
self += source.split() if hasattr(source, 'split') else source
|
||||
self.error = error
|
||||
|
||||
@staticmethod
|
||||
def from_pattern(source):
|
||||
source = re.sub(r'([\[\]\(\)\|]|\.\.\.)', r' \1 ', source)
|
||||
source = [s for s in re.split('\s+|(\S*<.*?>)', source) if s]
|
||||
return Tokens(source, error=DocoptLanguageError)
|
||||
|
||||
def move(self):
|
||||
return self.pop(0) if len(self) else None
|
||||
|
||||
def current(self):
|
||||
return self[0] if len(self) else None
|
||||
|
||||
|
||||
def parse_long(tokens, options):
|
||||
"""long ::= '--' chars [ ( ' ' | '=' ) chars ] ;"""
|
||||
long, eq, value = tokens.move().partition('=')
|
||||
assert long.startswith('--')
|
||||
value = None if eq == value == '' else value
|
||||
similar = [o for o in options if o.long == long]
|
||||
if tokens.error is DocoptExit and similar == []: # if no exact match
|
||||
similar = [o for o in options if o.long and o.long.startswith(long)]
|
||||
if len(similar) > 1: # might be simply specified ambiguously 2+ times?
|
||||
raise tokens.error('%s is not a unique prefix: %s?' %
|
||||
(long, ', '.join(o.long for o in similar)))
|
||||
elif len(similar) < 1:
|
||||
argcount = 1 if eq == '=' else 0
|
||||
o = Option(None, long, argcount)
|
||||
options.append(o)
|
||||
if tokens.error is DocoptExit:
|
||||
o = Option(None, long, argcount, value if argcount else True)
|
||||
else:
|
||||
o = Option(similar[0].short, similar[0].long,
|
||||
similar[0].argcount, similar[0].value)
|
||||
if o.argcount == 0:
|
||||
if value is not None:
|
||||
raise tokens.error('%s must not have an argument' % o.long)
|
||||
else:
|
||||
if value is None:
|
||||
if tokens.current() in [None, '--']:
|
||||
raise tokens.error('%s requires argument' % o.long)
|
||||
value = tokens.move()
|
||||
if tokens.error is DocoptExit:
|
||||
o.value = value if value is not None else True
|
||||
return [o]
|
||||
|
||||
|
||||
def parse_shorts(tokens, options):
|
||||
"""shorts ::= '-' ( chars )* [ [ ' ' ] chars ] ;"""
|
||||
token = tokens.move()
|
||||
assert token.startswith('-') and not token.startswith('--')
|
||||
left = token.lstrip('-')
|
||||
parsed = []
|
||||
while left != '':
|
||||
short, left = '-' + left[0], left[1:]
|
||||
similar = [o for o in options if o.short == short]
|
||||
if len(similar) > 1:
|
||||
raise tokens.error('%s is specified ambiguously %d times' %
|
||||
(short, len(similar)))
|
||||
elif len(similar) < 1:
|
||||
o = Option(short, None, 0)
|
||||
options.append(o)
|
||||
if tokens.error is DocoptExit:
|
||||
o = Option(short, None, 0, True)
|
||||
else: # why copying is necessary here?
|
||||
o = Option(short, similar[0].long,
|
||||
similar[0].argcount, similar[0].value)
|
||||
value = None
|
||||
if o.argcount != 0:
|
||||
if left == '':
|
||||
if tokens.current() in [None, '--']:
|
||||
raise tokens.error('%s requires argument' % short)
|
||||
value = tokens.move()
|
||||
else:
|
||||
value = left
|
||||
left = ''
|
||||
if tokens.error is DocoptExit:
|
||||
o.value = value if value is not None else True
|
||||
parsed.append(o)
|
||||
return parsed
|
||||
|
||||
|
||||
def parse_pattern(source, options):
|
||||
tokens = Tokens.from_pattern(source)
|
||||
result = parse_expr(tokens, options)
|
||||
if tokens.current() is not None:
|
||||
raise tokens.error('unexpected ending: %r' % ' '.join(tokens))
|
||||
return Required(*result)
|
||||
|
||||
|
||||
def parse_expr(tokens, options):
|
||||
"""expr ::= seq ( '|' seq )* ;"""
|
||||
seq = parse_seq(tokens, options)
|
||||
if tokens.current() != '|':
|
||||
return seq
|
||||
result = [Required(*seq)] if len(seq) > 1 else seq
|
||||
while tokens.current() == '|':
|
||||
tokens.move()
|
||||
seq = parse_seq(tokens, options)
|
||||
result += [Required(*seq)] if len(seq) > 1 else seq
|
||||
return [Either(*result)] if len(result) > 1 else result
|
||||
|
||||
|
||||
def parse_seq(tokens, options):
|
||||
"""seq ::= ( atom [ '...' ] )* ;"""
|
||||
result = []
|
||||
while tokens.current() not in [None, ']', ')', '|']:
|
||||
atom = parse_atom(tokens, options)
|
||||
if tokens.current() == '...':
|
||||
atom = [OneOrMore(*atom)]
|
||||
tokens.move()
|
||||
result += atom
|
||||
return result
|
||||
|
||||
|
||||
def parse_atom(tokens, options):
|
||||
"""atom ::= '(' expr ')' | '[' expr ']' | 'options'
|
||||
| long | shorts | argument | command ;
|
||||
"""
|
||||
token = tokens.current()
|
||||
result = []
|
||||
if token in '([':
|
||||
tokens.move()
|
||||
matching, pattern = {'(': [')', Required], '[': [']', Optional]}[token]
|
||||
result = pattern(*parse_expr(tokens, options))
|
||||
if tokens.move() != matching:
|
||||
raise tokens.error("unmatched '%s'" % token)
|
||||
return [result]
|
||||
elif token == 'options':
|
||||
tokens.move()
|
||||
return [OptionsShortcut()]
|
||||
elif token.startswith('--') and token != '--':
|
||||
return parse_long(tokens, options)
|
||||
elif token.startswith('-') and token not in ('-', '--'):
|
||||
return parse_shorts(tokens, options)
|
||||
elif token.startswith('<') and token.endswith('>') or token.isupper():
|
||||
return [Argument(tokens.move())]
|
||||
else:
|
||||
return [Command(tokens.move())]
|
||||
|
||||
|
||||
def parse_argv(tokens, options, options_first=False):
|
||||
"""Parse command-line argument vector.
|
||||
|
||||
If options_first:
|
||||
argv ::= [ long | shorts ]* [ argument ]* [ '--' [ argument ]* ] ;
|
||||
else:
|
||||
argv ::= [ long | shorts | argument ]* [ '--' [ argument ]* ] ;
|
||||
|
||||
"""
|
||||
parsed = []
|
||||
while tokens.current() is not None:
|
||||
if tokens.current() == '--':
|
||||
return parsed + [Argument(None, v) for v in tokens]
|
||||
elif tokens.current().startswith('--'):
|
||||
parsed += parse_long(tokens, options)
|
||||
elif tokens.current().startswith('-') and tokens.current() != '-':
|
||||
parsed += parse_shorts(tokens, options)
|
||||
elif options_first:
|
||||
return parsed + [Argument(None, v) for v in tokens]
|
||||
else:
|
||||
parsed.append(Argument(None, tokens.move()))
|
||||
return parsed
|
||||
|
||||
|
||||
def parse_defaults(doc):
|
||||
defaults = []
|
||||
for s in parse_section('options:', doc):
|
||||
# FIXME corner case "bla: options: --foo"
|
||||
_, _, s = s.partition(':') # get rid of "options:"
|
||||
split = re.split('\n[ \t]*(-\S+?)', '\n' + s)[1:]
|
||||
split = [s1 + s2 for s1, s2 in zip(split[::2], split[1::2])]
|
||||
options = [Option.parse(s) for s in split if s.startswith('-')]
|
||||
defaults += options
|
||||
return defaults
|
||||
|
||||
|
||||
def parse_section(name, source):
|
||||
pattern = re.compile('^([^\n]*' + name + '[^\n]*\n?(?:[ \t].*?(?:\n|$))*)',
|
||||
re.IGNORECASE | re.MULTILINE)
|
||||
return [s.strip() for s in pattern.findall(source)]
|
||||
|
||||
|
||||
def formal_usage(section):
|
||||
_, _, section = section.partition(':') # drop "usage:"
|
||||
pu = section.split()
|
||||
return '( ' + ' '.join(') | (' if s == pu[0] else s for s in pu[1:]) + ' )'
|
||||
|
||||
|
||||
def extras(help, version, options, doc):
|
||||
if help and any((o.name in ('-h', '--help')) and o.value for o in options):
|
||||
print(doc.strip("\n"))
|
||||
sys.exit()
|
||||
if version and any(o.name == '--version' and o.value for o in options):
|
||||
print(version)
|
||||
sys.exit()
|
||||
|
||||
|
||||
class Dict(dict):
|
||||
|
||||
def __repr__(self):
|
||||
return '{%s}' % ',\n '.join('%r: %r' % i for i in sorted(self.items()))
|
||||
|
||||
|
||||
def docopt(doc, argv=None, help=True, version=None, options_first=False):
|
||||
"""Parse `argv` based on command-line interface described in `doc`.
|
||||
|
||||
`docopt` creates your command-line interface based on its
|
||||
description that you pass as `doc`. Such description can contain
|
||||
--options, <positional-argument>, commands, which could be
|
||||
[optional], (required), (mutually | exclusive) or repeated...
|
||||
|
||||
Parameters
|
||||
----------
|
||||
doc : str
|
||||
Description of your command-line interface.
|
||||
argv : list of str, optional
|
||||
Argument vector to be parsed. sys.argv[1:] is used if not
|
||||
provided.
|
||||
help : bool (default: True)
|
||||
Set to False to disable automatic help on -h or --help
|
||||
options.
|
||||
version : any object
|
||||
If passed, the object will be printed if --version is in
|
||||
`argv`.
|
||||
options_first : bool (default: False)
|
||||
Set to True to require options precede positional arguments,
|
||||
i.e. to forbid options and positional arguments intermix.
|
||||
|
||||
Returns
|
||||
-------
|
||||
args : dict
|
||||
A dictionary, where keys are names of command-line elements
|
||||
such as e.g. "--verbose" and "<path>", and values are the
|
||||
parsed values of those elements.
|
||||
|
||||
Example
|
||||
-------
|
||||
>>> from docopt import docopt
|
||||
>>> doc = '''
|
||||
... Usage:
|
||||
... my_program tcp <host> <port> [--timeout=<seconds>]
|
||||
... my_program serial <port> [--baud=<n>] [--timeout=<seconds>]
|
||||
... my_program (-h | --help | --version)
|
||||
...
|
||||
... Options:
|
||||
... -h, --help Show this screen and exit.
|
||||
... --baud=<n> Baudrate [default: 9600]
|
||||
... '''
|
||||
>>> argv = ['tcp', '127.0.0.1', '80', '--timeout', '30']
|
||||
>>> docopt(doc, argv)
|
||||
{'--baud': '9600',
|
||||
'--help': False,
|
||||
'--timeout': '30',
|
||||
'--version': False,
|
||||
'<host>': '127.0.0.1',
|
||||
'<port>': '80',
|
||||
'serial': False,
|
||||
'tcp': True}
|
||||
|
||||
See also
|
||||
--------
|
||||
* For video introduction see http://docopt.org
|
||||
* Full documentation is available in README.rst as well as online
|
||||
at https://github.com/docopt/docopt#readme
|
||||
|
||||
"""
|
||||
argv = sys.argv[1:] if argv is None else argv
|
||||
|
||||
usage_sections = parse_section('usage:', doc)
|
||||
if len(usage_sections) == 0:
|
||||
raise DocoptLanguageError('"usage:" (case-insensitive) not found.')
|
||||
if len(usage_sections) > 1:
|
||||
raise DocoptLanguageError('More than one "usage:" (case-insensitive).')
|
||||
DocoptExit.usage = usage_sections[0]
|
||||
|
||||
options = parse_defaults(doc)
|
||||
pattern = parse_pattern(formal_usage(DocoptExit.usage), options)
|
||||
# [default] syntax for argument is disabled
|
||||
# for a in pattern.flat(Argument):
|
||||
# same_name = [d for d in arguments if d.name == a.name]
|
||||
# if same_name:
|
||||
# a.value = same_name[0].value
|
||||
argv = parse_argv(Tokens(argv), list(options), options_first)
|
||||
pattern_options = set(pattern.flat(Option))
|
||||
for options_shortcut in pattern.flat(OptionsShortcut):
|
||||
doc_options = parse_defaults(doc)
|
||||
options_shortcut.children = list(set(doc_options) - pattern_options)
|
||||
# if any_options:
|
||||
# options_shortcut.children += [Option(o.short, o.long, o.argcount)
|
||||
# for o in argv if type(o) is Option]
|
||||
extras(help, version, argv, doc)
|
||||
matched, left, collected = pattern.fix().match(argv)
|
||||
if matched and left == []: # better error message if left?
|
||||
return Dict((a.name, a.value) for a in (pattern.flat() + collected))
|
||||
raise DocoptExit()
|
118
src/misc/elts_abrev.dat
Normal file
118
src/misc/elts_abrev.dat
Normal file
@ -0,0 +1,118 @@
|
||||
1 - H - Hydrogen
|
||||
2 - He - Helium
|
||||
3 - Li - Lithium
|
||||
4 - Be - Beryllium
|
||||
5 - B - Boron
|
||||
6 - C - Carbon
|
||||
7 - N - Nitrogen
|
||||
8 - O - Oxygen
|
||||
9 - F - Fluorine
|
||||
10 - Ne - Neon
|
||||
11 - Na - Sodium
|
||||
12 - Mg - Magnesium
|
||||
13 - Al - Aluminum
|
||||
14 - Si - Silicon
|
||||
15 - P - Phosphorus
|
||||
16 - S - Sulfur
|
||||
17 - Cl - Chlorine
|
||||
18 - Ar - Argon
|
||||
19 - K - Potassium
|
||||
20 - Ca - Calcium
|
||||
21 - Sc - Scandium
|
||||
22 - Ti - Titanium
|
||||
23 - V - Vanadium
|
||||
24 - Cr - Chromium
|
||||
25 - Mn - Manganese
|
||||
26 - Fe - Iron
|
||||
27 - Co - Cobalt
|
||||
28 - Ni - Nickel
|
||||
29 - Cu - Copper
|
||||
30 - Zn - Zinc
|
||||
31 - Ga - Gallium
|
||||
32 - Ge - Germanium
|
||||
33 - As - Arsenic
|
||||
34 - Se - Selenium
|
||||
35 - Br - Bromine
|
||||
36 - Kr - Krypton
|
||||
37 - Rb - Rubidium
|
||||
38 - Sr - Strontium
|
||||
39 - Y - Yttrium
|
||||
40 - Zr - Zirconium
|
||||
41 - Nb - Niobium
|
||||
42 - Mo - Molybdenum
|
||||
43 - Tc - Technetium
|
||||
44 - Ru - Ruthenium
|
||||
45 - Rh - Rhodium
|
||||
46 - Pd - Palladium
|
||||
47 - Ag - Silver
|
||||
48 - Cd - Cadmium
|
||||
49 - In - Indium
|
||||
50 - Sn - Tin
|
||||
51 - Sb - Antimony
|
||||
52 - Te - Tellurium
|
||||
53 - I - Iodine
|
||||
54 - Xe - Xenon
|
||||
55 - Cs - Cesium
|
||||
56 - Ba - Barium
|
||||
57 - La - Lanthanum
|
||||
58 - Ce - Cerium
|
||||
59 - Pr - Praseodymium
|
||||
60 - Nd - Neodymium
|
||||
61 - Pm - Promethium
|
||||
62 - Sm - Samarium
|
||||
63 - Eu - Europium
|
||||
64 - Gd - Gadolinium
|
||||
65 - Tb - Terbium
|
||||
66 - Dy - Dysprosium
|
||||
67 - Ho - Holmium
|
||||
68 - Er - Erbium
|
||||
69 - Tm - Thulium
|
||||
70 - Yb - Ytterbium
|
||||
71 - Lu - Lutetium
|
||||
72 - Hf - Hafnium
|
||||
73 - Ta - Tantalum
|
||||
74 - W - Tungsten
|
||||
75 - Re - Rhenium
|
||||
76 - Os - Osmium
|
||||
77 - Ir - Iridium
|
||||
78 - Pt - Platinum
|
||||
79 - Au - Gold
|
||||
80 - Hg - Mercury
|
||||
81 - Tl - Thallium
|
||||
82 - Pb - Lead
|
||||
83 - Bi - Bismuth
|
||||
84 - Po - Polonium
|
||||
85 - At - Astatine
|
||||
86 - Rn - Radon
|
||||
87 - Fr - Francium
|
||||
88 - Ra - Radium
|
||||
89 - Ac - Actinium
|
||||
90 - Th - Thorium
|
||||
91 - Pa - Protactinium
|
||||
92 - U - Uranium
|
||||
93 - Np - Neptunium
|
||||
94 - Pu - Plutonium
|
||||
95 - Am - Americium
|
||||
96 - Cm - Curium
|
||||
97 - Bk - Berkelium
|
||||
98 - Cf - Californium
|
||||
99 - Es - Einsteinium
|
||||
100 - Fm - Fermium
|
||||
101 - Md - Mendelevium
|
||||
102 - No - Nobelium
|
||||
103 - Lr - Lawrencium
|
||||
104 - Rf - Rutherfordium
|
||||
105 - Db - Dubnium
|
||||
106 - Sg - Seaborgium
|
||||
107 - Bh - Bohrium
|
||||
108 - Hs - Hassium
|
||||
109 - Mt - Meitnerium
|
||||
110 - Ds - Darmstadtium
|
||||
111 - Rg - Roentgenium
|
||||
112 - Cn - Copernicium
|
||||
113 - Uut - Ununtrium
|
||||
114 - Fl - Flerovium
|
||||
115 - Uup - Ununpentium
|
||||
116 - Lv - Livermorium
|
||||
117 - Uus - Ununseptium
|
||||
118 - Uuo - Ununoctium
|
0
src/parser/__init__.py
Normal file
0
src/parser/__init__.py
Normal file
52
src/parser/check_validity.py
Normal file
52
src/parser/check_validity.py
Normal file
@ -0,0 +1,52 @@
|
||||
# _
|
||||
# / |_ _ _ | _. | o _| o _|_
|
||||
# \_ | | (/_ (_ |< \/ (_| | | (_| | |_ \/
|
||||
# /
|
||||
# Do this After the L special case traitement.
|
||||
|
||||
import sys
|
||||
|
||||
|
||||
def check_gamess(str_type):
|
||||
"""Check is the orbital type is handle by gamess"""
|
||||
|
||||
assert len(str_type) == 1
|
||||
|
||||
if str_type in "S P D".split():
|
||||
return True
|
||||
elif str_type == "SP":
|
||||
raise BaseException
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
def check_NWChem(str_type):
|
||||
"""Check is the orbital type is handle by gamess"""
|
||||
|
||||
assert len(str_type) == 1
|
||||
|
||||
if str_type in "S P D".split():
|
||||
return True
|
||||
elif str_type > "I" or str_type in "K L M".split():
|
||||
raise BaseException
|
||||
else:
|
||||
return True
|
||||
|
||||
|
||||
d_check = {"GAMESS-US": check_gamess,
|
||||
"NWChem": check_NWChem}
|
||||
|
||||
|
||||
def get_check_function(name_program):
|
||||
"""
|
||||
Tranforme SP special function (create using get_symmetry_function)
|
||||
into S and P
|
||||
"""
|
||||
try:
|
||||
f = d_check[name_program]
|
||||
except KeyError:
|
||||
str_ = "You need to add a check funtion for your program {0}"
|
||||
print >> sys.stderr, str_.format(name_program)
|
||||
print >> sys.stderr, "This one are avalaible {0}".format(d_check.keys())
|
||||
sys.exit(1)
|
||||
return f
|
138
src/parser/gamess_us.py
Normal file
138
src/parser/gamess_us.py
Normal file
@ -0,0 +1,138 @@
|
||||
# __
|
||||
# /__ _. ._ _ _ _ _ _
|
||||
# \_| (_| | | | (/_ _> _> |_| _>
|
||||
#
|
||||
|
||||
from src.parser_handler import get_dict_ele
|
||||
import re
|
||||
|
||||
|
||||
def parse_basis_data_gamess_us(data, name, des, elts, debug=False):
|
||||
"""Parse the basis data raw html of gamess-us to get a nice tuple
|
||||
Return (name, description, [[ele, data_ele],...])"""
|
||||
basis_data = []
|
||||
|
||||
b = data.find("$DATA")
|
||||
e = data.find("$END")
|
||||
if (b == -1 or data.find("$DATA$END") != -1):
|
||||
if debug:
|
||||
print data
|
||||
raise Exception("WARNING not DATA")
|
||||
else:
|
||||
dict_replace = {"PHOSPHOROUS": "PHOSPHORUS",
|
||||
"D+": "E+",
|
||||
"D-": "E-"}
|
||||
|
||||
for k, v in dict_replace.iteritems():
|
||||
data = data.replace(k, v)
|
||||
|
||||
data = data[b + 5:e - 1].split('\n\n')
|
||||
|
||||
dict_ele = get_dict_ele()
|
||||
|
||||
for (elt, data_elt) in zip(elts, data):
|
||||
|
||||
elt_long_th = dict_ele[elt.lower()]
|
||||
elt_long_exp = data_elt.split()[0].lower()
|
||||
|
||||
if "$" in data_elt:
|
||||
if debug:
|
||||
print "Eror",
|
||||
raise Exception("WARNING bad split")
|
||||
|
||||
if elt_long_th == elt_long_exp:
|
||||
basis_data.append([elt, data_elt.strip()])
|
||||
else:
|
||||
if debug:
|
||||
print "th", elt_long_th
|
||||
print "exp", elt_long_exp
|
||||
print "abv", elt
|
||||
raise Exception("WARNING not a good ELEMENT")
|
||||
|
||||
return (name, des, basis_data)
|
||||
|
||||
|
||||
symmetry_regex = re.compile(ur'^(\w)\s+\d+\b')
|
||||
|
||||
|
||||
def l_symmetry_gamess_us(atom_basis):
|
||||
"""
|
||||
Return the begin and the end of all the type of orbital
|
||||
input: atom_basis = [name, S 1, 12 0.12 12212, ...]
|
||||
output: [ [type, begin, end], ...]
|
||||
"""
|
||||
# Example
|
||||
# [[u'S', 1, 5], [u'L', 5, 9], [u'L', 9, 12], [u'D', 16, 18]]"
|
||||
|
||||
l = []
|
||||
for i, line in enumerate(atom_basis):
|
||||
# Optimisation for not seaching all the time
|
||||
if len(line) < 10:
|
||||
m = re.search(symmetry_regex, line)
|
||||
if m:
|
||||
# Cause of L !
|
||||
read_symmetry = m.group(1)
|
||||
|
||||
# L is real L or special SP
|
||||
# Just check the number of exponant
|
||||
if all([read_symmetry == "L",
|
||||
len(atom_basis[i + 1].split()) == 4]):
|
||||
real_symmetry = "SP"
|
||||
else:
|
||||
real_symmetry = read_symmetry
|
||||
|
||||
l.append([real_symmetry, i])
|
||||
try:
|
||||
l[-2].append(i)
|
||||
except IndexError:
|
||||
pass
|
||||
|
||||
l[-1].append(i + 1)
|
||||
return l
|
||||
|
||||
|
||||
def handle_l_gamess_us(l_atom_basis):
|
||||
"""
|
||||
Read l_atom_basis and change the SP in L and P
|
||||
"""
|
||||
|
||||
l_data = []
|
||||
for atom_basis in l_atom_basis:
|
||||
|
||||
# Split the data in line
|
||||
l_line_raw = atom_basis.split("\n")
|
||||
l_line = [l_line_raw[0]]
|
||||
# l_line_raw[0] containt the name of the Atom
|
||||
|
||||
for symmetry, begin, end in l_symmetry_gamess_us(l_line_raw):
|
||||
|
||||
if symmetry == "SP":
|
||||
|
||||
body_s = []
|
||||
body_p = []
|
||||
|
||||
for i_l in l_line_raw[begin + 1:end]:
|
||||
|
||||
# one L => S & P
|
||||
a = i_l.split()
|
||||
|
||||
common = "{:>3}".format(a[0])
|
||||
common += "{:>15.7f}".format(float(a[1]))
|
||||
|
||||
tail_s = common + "{:>23.7f}".format(float(a[2]))
|
||||
body_s.append(tail_s)
|
||||
|
||||
tail_p = common + "{:>23.7f}".format(float(a[3]))
|
||||
body_p.append(tail_p)
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "S")]
|
||||
l_line += body_s
|
||||
|
||||
l_line += [l_line_raw[begin].replace("L", "P")]
|
||||
l_line += body_p
|
||||
else:
|
||||
l_line += l_line_raw[begin:end]
|
||||
|
||||
l_data.append("\n".join(l_line))
|
||||
|
||||
return l_data
|
83
src/parser/gaussian94.py
Normal file
83
src/parser/gaussian94.py
Normal file
@ -0,0 +1,83 @@
|
||||
# __ _
|
||||
# /__ _. _ _ o _. ._ (_| |_|_
|
||||
# \_| (_| |_| _> _> | (_| | | | |
|
||||
#
|
||||
import sys
|
||||
|
||||
|
||||
def parse_basis_data_gaussian94(data, name, description, elements, debug=True):
|
||||
"""Parse the Gaussian94 basis data raw html to get a nice tuple.
|
||||
|
||||
The data-pairs item is actually expected to be a 2 item list:
|
||||
[symbol, data]
|
||||
|
||||
e.g. ["Ca", "#BASIS SET..."]
|
||||
|
||||
N.B.: Currently ignores ECP data!
|
||||
|
||||
@param data: raw HTML from BSE
|
||||
@type data : unicode
|
||||
@param name: basis set name
|
||||
@type name : str
|
||||
@param des: basis set description
|
||||
@type des : str
|
||||
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
|
||||
@type elements : list
|
||||
@return: (name, description, data-pairs)
|
||||
@rtype : tuple
|
||||
"""
|
||||
|
||||
# Each basis set block starts and ends with ****. Find the region
|
||||
# containing all the basis blocks using the first and last ****.
|
||||
mark = "****"
|
||||
begin = data.find(mark)
|
||||
end = data.rfind(mark)
|
||||
|
||||
if begin == -1 or end == -1:
|
||||
if debug:
|
||||
print(data)
|
||||
str_ = " No basis set data found while attempting to process {0} ({1})"
|
||||
raise ValueError(str_.format(name, description))
|
||||
|
||||
trimmed = data[begin + len(mark): end - len(mark)].strip()
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by mark into per-element chunks
|
||||
for line in trimmed.split("\n"):
|
||||
if line.startswith(mark):
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another basis set block
|
||||
# also remove the marker lines from the chunk itself
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join([L for L in c if mark not in L]) for c in chunks]
|
||||
|
||||
# check each block for element and assign symbols to final pairs
|
||||
pairs = []
|
||||
unused_elements = set([e.upper() for e in elements])
|
||||
for chunk in chunks:
|
||||
# get first 3 chars of first line in block
|
||||
symbol = chunk.split("\n")[0][:3].strip()
|
||||
try:
|
||||
unused_elements.remove(symbol.upper())
|
||||
except KeyError:
|
||||
if debug:
|
||||
msg = "Warning: already processed {0}\n".format(symbol)
|
||||
sys.stderr.write(msg)
|
||||
pairs.append([symbol, chunk])
|
||||
|
||||
if unused_elements:
|
||||
msg = "Warning: elements {0} left over for {1}".format(
|
||||
list(unused_elements),
|
||||
name)
|
||||
print(msg)
|
||||
|
||||
return (name, description, pairs)
|
228
src/parser/nwchem.py
Normal file
228
src/parser/nwchem.py
Normal file
@ -0,0 +1,228 @@
|
||||
# _
|
||||
# |\ | / |_ _ ._ _
|
||||
# | \| \/\/ \_ | | (/_ | | |
|
||||
#
|
||||
import json
|
||||
|
||||
|
||||
def extract_basis_nwchem(data, name):
|
||||
"""Extract atomic orbital, charge density fitting, or exchange
|
||||
correlation functional basis data from a text region passed in as
|
||||
data. The charge density fitting and exchange correlation functional
|
||||
basis set data are employed for density functional calculations.
|
||||
|
||||
@param data: text region containing basis set data
|
||||
@type data : str
|
||||
@param name: name of basis type: "ao basis", "cd basis", or "xc basis"
|
||||
@type name : str
|
||||
@return: per-element basis set chunks
|
||||
@rtype : list
|
||||
"""
|
||||
|
||||
begin_marker = """BASIS "{0}" PRINT""".format(name)
|
||||
end_marker = "END"
|
||||
|
||||
# search for the basis set data begin marker
|
||||
# calling "upper" on data because original data has inconsistent
|
||||
# capitalization
|
||||
begin = data.upper().find(begin_marker.upper())
|
||||
end = data.upper().find(end_marker, begin)
|
||||
|
||||
# No basis data found
|
||||
if begin == -1:
|
||||
return []
|
||||
|
||||
trimmed = data[begin + len(begin_marker): end - len(end_marker)].strip()
|
||||
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by #BASIS SET... into per-element chunks
|
||||
for line in trimmed.split("\n"):
|
||||
if line.upper().startswith("#BASIS SET"):
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another #BASIS SET...
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join(c) for c in chunks]
|
||||
return chunks
|
||||
|
||||
|
||||
def extract_ecp_nwchem(data):
|
||||
"""Extract the effective core potential basis data from a text region
|
||||
passed in as data.
|
||||
|
||||
@param data: text region containing ECP data
|
||||
@type data : str
|
||||
@return: per-element effective core potential chunks
|
||||
@rtype : list
|
||||
"""
|
||||
|
||||
ecp_begin_mark = "ECP\n"
|
||||
ecp_end_mark = "END"
|
||||
ecp_begin = data.upper().find(ecp_begin_mark)
|
||||
ecp_end = data.upper().find(ecp_end_mark, ecp_begin)
|
||||
ecp_region = ""
|
||||
|
||||
if ecp_begin > -1 and ecp_end > -1:
|
||||
ecp_region = data[
|
||||
ecp_begin +
|
||||
len(ecp_begin_mark): ecp_end -
|
||||
len(ecp_end_mark)].strip()
|
||||
|
||||
# No ECP data, so return empty list
|
||||
else:
|
||||
return []
|
||||
|
||||
chunks = []
|
||||
lines = []
|
||||
|
||||
# group lines of data delimited by XX nelec YY into chunks, e.g.
|
||||
# "Zn nelec 18" begins a zinc ECP
|
||||
for line in ecp_region.split("\n"):
|
||||
if line.lower().find(" nelec ") > -1:
|
||||
if lines:
|
||||
chunks.append(lines)
|
||||
lines = [line]
|
||||
else:
|
||||
lines.append(line)
|
||||
|
||||
# handle trailing chunk that is not followed by another XX nelec YY..
|
||||
if lines and (not chunks or lines != chunks[-1]):
|
||||
chunks.append(lines)
|
||||
|
||||
# join lines back into solid text blocks
|
||||
chunks = ["\n".join(c) for c in chunks]
|
||||
return chunks
|
||||
|
||||
|
||||
def unpack_nwchem_basis_block(data):
|
||||
"""Unserialize a NWChem basis data block and extract components
|
||||
|
||||
@param data: a JSON of basis set data, perhaps containing many types
|
||||
@type data : str
|
||||
@return: unpacked data
|
||||
@rtype : dict
|
||||
"""
|
||||
|
||||
unpacked = json.loads(data)
|
||||
return unpacked
|
||||
|
||||
|
||||
def parse_basis_data_nwchem(data, name, description, elements, debug=True):
|
||||
"""Parse the NWChem basis data raw html to get a nice tuple.
|
||||
|
||||
The data-pairs item is actually expected to be a 2 item list:
|
||||
[symbol, data]
|
||||
|
||||
e.g. ["Ca", "#BASIS SET..."]
|
||||
|
||||
@param data: raw HTML from BSE
|
||||
@type data : unicode
|
||||
@param name: basis set name
|
||||
@type name : str
|
||||
@param des: basis set description
|
||||
@type des : str
|
||||
@param elements: element symbols e.g. ['H', 'C', 'N', 'O', 'Cl']
|
||||
@type elements : list
|
||||
@return: (name, description, data-pairs)
|
||||
@rtype : tuple
|
||||
"""
|
||||
|
||||
unused_elements = set([e.upper() for e in elements])
|
||||
|
||||
def extract_symbol(txt):
|
||||
for sline in txt.split("\n"):
|
||||
if not sline.startswith("#"):
|
||||
try:
|
||||
symbol = sline[:3].strip().split()[0]
|
||||
return symbol
|
||||
except IndexError:
|
||||
continue
|
||||
|
||||
raise ValueError("Can't find element symbol in {0}".format(txt))
|
||||
|
||||
ao_chunks = extract_basis_nwchem(data, "ao basis")
|
||||
cd_chunks = extract_basis_nwchem(data, "cd basis")
|
||||
xc_chunks = extract_basis_nwchem(data, "xc basis")
|
||||
ecp_chunks = extract_ecp_nwchem(data)
|
||||
|
||||
if not any([ao_chunks, cd_chunks, xc_chunks, ecp_chunks]):
|
||||
str_ = "No basis set data found while attempting to process {0} ({1})"
|
||||
raise ValueError(str_.format(name, description))
|
||||
|
||||
# Tag all used elements, whether from ordinary AO basis or ECP section
|
||||
for chunk in ao_chunks + cd_chunks + xc_chunks + ecp_chunks:
|
||||
try:
|
||||
symbol = extract_symbol(chunk)
|
||||
unused_elements.remove(symbol.upper())
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
if unused_elements:
|
||||
msg = "Warning: elements {0} left over for {1}"
|
||||
print msg.format(list(unused_elements), name)
|
||||
|
||||
# Form packed chunks, turn packed chunks into pairs
|
||||
used_elements = set()
|
||||
packed = {}
|
||||
|
||||
for cgroup, gname in [(ao_chunks, "ao basis"), (cd_chunks, "cd basis"),
|
||||
(xc_chunks, "xc basis"), (ecp_chunks, "ecp")]:
|
||||
for chunk in cgroup:
|
||||
symbol = extract_symbol(chunk)
|
||||
|
||||
# Expand entry, e.g. add ecp data for Na after it has ao basis
|
||||
try:
|
||||
idx, ch = packed[symbol]
|
||||
ch[gname] = chunk
|
||||
chunk_dict = ch.copy()
|
||||
# Create fresh entry, e.g. add Na with initial ao basis
|
||||
except KeyError:
|
||||
chunk_dict = {gname: chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
|
||||
"""
|
||||
for chunk in ao_chunks:
|
||||
symbol = extract_symbol(chunk)
|
||||
chunk_dict = {"ao basis" : chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
|
||||
for chunk in ecp_chunks:
|
||||
symbol = extract_symbol(chunk)
|
||||
#add ECP data if existing chunk, else create fresh chunk
|
||||
try:
|
||||
idx, ch = packed[symbol]
|
||||
ch["ecp"] = chunk
|
||||
chunk_dict = ch.copy()
|
||||
except KeyError:
|
||||
chunk_dict = {"ecp" : chunk}
|
||||
idx = len(used_elements)
|
||||
used_elements.add(symbol)
|
||||
packed[symbol] = (idx, chunk_dict)
|
||||
"""
|
||||
|
||||
values = sorted(packed.values())
|
||||
|
||||
# Assign (Symbol, Serialized) to final pairs
|
||||
pairs = []
|
||||
for idx, chunk in values:
|
||||
symbol = extract_symbol(chunk.get("ao basis")
|
||||
or chunk.get("cd basis")
|
||||
or chunk.get("xc basis")
|
||||
or chunk.get("ecp"))
|
||||
serialized = json.dumps(chunk)
|
||||
pairs.append([symbol, serialized])
|
||||
return [name, description, pairs]
|
Loading…
Reference in New Issue
Block a user