10
0
mirror of https://github.com/LCPQ/QUESTDB_website.git synced 2025-01-12 05:58:23 +01:00

Creae datafileBuilder and other python lib to manipulate datafile

This commit is contained in:
Mickaël Véril 2019-11-12 14:36:23 +01:00
parent 5c76b08c2d
commit 10ca75a0f0
8 changed files with 417 additions and 2 deletions

1
.gitignore vendored
View File

@ -5,3 +5,4 @@ public
*.swp *.swp
*.out *.out
public public
__pycache__

43
docs/example.tex Normal file
View File

@ -0,0 +1,43 @@
\newcommand{\TDDFT}{TD-DFT}
\newcommand{\CASSCF}{CASSCF}
\newcommand{\CASPT}{CASPT2}
\newcommand{\ADC}[1]{ADC(#1)}
\newcommand{\CC}[1]{CC#1}
\newcommand{\CCSD}{CCSD}
\newcommand{\EOMCCSD}{EOM-CCSD}
\newcommand{\CCSDT}{CCSDT}
\newcommand{\CCSDTQ}{CCSDTQ}
\newcommand{\CCSDTQP}{CCSDTQP}
\newcommand{\CI}{CI}
\newcommand{\sCI}{sCI}
\newcommand{\exCI}{exFCI}
\newcommand{\FCI}{FCI}
% basis
\newcommand{\AVDZ}{\emph{aug}-cc-pVDZ}
\newcommand{\AVTZ}{\emph{aug}-cc-pVTZ}
\newcommand{\DAVTZ}{d-\emph{aug}-cc-pVTZ}
\newcommand{\AVQZ}{\emph{aug}-cc-pVQZ}
\newcommand{\DAVQZ}{d-\emph{aug}-cc-pVQZ}
\newcommand{\TAVQZ}{t-\emph{aug}-cc-pVQZ}
\newcommand{\AVPZ}{\emph{aug}-cc-pV5Z}
\newcommand{\DAVPZ}{d-\emph{aug}-cc-pV5Z}
\newcommand{\PopleDZ}{6-31+G(d)}
\begin{tabular}{l|p{.6cm}p{1.1cm}p{1.4cm}p{1.7cm}p{.9cm}|p{.6cm}p{1.1cm}p{1.4cm}p{.9cm}|p{.6cm}p{1.1cm}p{.9cm}|p{.7cm}p{.7cm}p{.7cm}}
\multicolumn{16}{c}{Dinitrogen}\\
& \multicolumn{5}{c}{\AVDZ} & \multicolumn{4}{c}{\AVTZ}& \multicolumn{3}{c}{\AVQZ} & \multicolumn{3}{c}{Litt.}\\
State & {\CC{3}} & {\CCSDT} & {\CCSDTQ} & {\CCSDTQP} & {\exCI} & {\CC{3}} & {\CCSDT} & {\CCSDTQ} & {\exCI}& {\CC{3}} & {\CCSDT} & {\exCI} & Exp.$^a$ & Exp.$^b$ & Th.$^c$\\
$^1\Pi_g (n \rightarrow \pis)$ &9.44 &9.41 & 9.41 &9.41 & 9.41 &9.34 &9.33 &9.32 &9.34 &9.33 &9.31 &9.34 &9.31 &9.31 &9.27 \\
$^1\Sigma_u^- (\pi \rightarrow \pis)$ &10.06 &10.06& 10.06&10.05& 10.05 &9.88 &9.89 &9.88 &9.88 &9.87 &9.88 &9.92 &9.92 &9.92 &10.09 \\
$^1\Delta_u (\pi \rightarrow \pis)$ &10.43 &10.44& 10.43&10.43& 10.43 &10.29&10.30 & &10.29 &10.27 &10.28 &10.31 &10.27 &10.27 &10.54 \\
$^1\Sigma_g^+ (\Ryd)$ &13.23 &13.20& 13.18&13.18& 13.18 &13.01&13.00 &12.97 &12.98 &12.90 &12.89 &12.89 & &12.2 &12.20 \\
$^1\Pi_u (\Ryd)$ &13.28 &13.17& 13.13&13.13& 13.12 &13.22&13.14 &13.09 &13.03 &13.17 & &13.1$^d$&12.78 &12.90 &12.84 \\
$^1\Sigma_u^+ (\Ryd)$ &13.14 &13.13& 13.11&13.11& 13.11 &13.12&13.12 &13.09 &13.09 &13.09 &13.09 &13.2$^d$&12.96 &12.98 &12.82 \\
$^1\Pi_u (\Ryd)$ &13.64 &13.59& 13.56&13.56& 13.56 &13.49&13.45 &13.42 &13.46 &13.42 &13.37 &13.7$^d$&13.10 &13.24 &13.61 \\
$^3\Sigma_u^+ (\pi \rightarrow \pis)$ &7.67 &7.68& 7.69 &7.70 & 7.70 &7.68 &7.69 &7.70 &7.70 &7.71 &7.71 &7.74 &7.75 &7.75 &7.56 \\
$^3\Pi_g (n \rightarrow \pis)$ &8.07 &8.06& 8.05 &8.05 & 8.05 &8.04 &8.03 &8.02 &8.01 &8.04 &8.04 &8.03 &8.04 &8.04 &8.05 \\
$^3\Delta_u (\pi \rightarrow \pis)$ &8.97 &8.96& 8.96 &8.96 & 8.96 &8.87 &8.87 &8.87 &8.87 &8.87 &8.87 &8.88 &8.88 &8.88 &8.93 \\
$^3\Sigma_u^- (\pi \rightarrow \pis)$ &9.78 &9.76& 9.75 &9.75& 9.75 &9.68 &9.68 &9.66 &9.66 &9.68 & &9.66 &9.67 &9.67 &9.86 \\
\end{tabular}

View File

@ -1,9 +1,9 @@
# Molecule : Water # Molecule : Water
# Comment : Delta ZPE energies of the water molecule # Comment : Delta ZPE energies of the water molecule
# code : Gaussian16 # code : Gaussian16
# method : B3LYP,6-31+G*
# GS : CC3,aug-cc-pVTZ # GS : CC3,aug-cc-pVTZ
# ES : CC2,cc-pVDZ # ES : CC2,cc-pVDZ
# method : B3LYP,6-31+G*
# DOI : 10.1021/acs.jctc.8b00406 # DOI : 10.1021/acs.jctc.8b00406
# Initial state Final state Energies (eV) # Initial state Final state Energies (eV)

33
tools/datafileBuilder.py Normal file
View File

@ -0,0 +1,33 @@
#!/usr/bin/env python
import sys
import re
import numpy as np
from pathlib import Path
from lib import LaTeX
from TexSoup import TexSoup
from lib.data import AbsDataFile,ZPEDataFile,FluoDataFile,dataType
import argparse
DEBUG=True
parser = argparse.ArgumentParser()
parser.add_argument('--file', type=argparse.FileType('r'))
parser.add_argument('--type', type=str, choices=[t.name for t in list(dataType)])
args = parser.parse_args()
print(args)
lines=args.file.readlines()
soup=TexSoup(lines)
commands=[LaTeX.newCommand(cmd) for cmd in soup.find_all("newcommand")]
dat=LaTeX.tabularToData(soup.tabular,commands)
scriptpath=Path(sys.argv[0]).resolve()
datapath=scriptpath.parents[1]/"static"/"data"
if DEBUG:
datapath=datapath/"test"
if not datapath.exists():
datapath.mkdir()
switcher={
dataType.ABS: AbsDataFile,
dataType.FLUO: FluoDataFile,
dataType.ZPE: ZPEDataFile
}
filecls=switcher.get(dataType[args.type])
for col in range(3,np.size(dat,1)):
filecls.readFromTable(dat,col).toFile(datapath)

130
tools/lib/LaTeX.py Normal file
View File

@ -0,0 +1,130 @@
import re
from TexSoup import TexSoup,TexCmd,TexNode
from .utils import *
import itertools
from enum import Enum
from abc import ABCMeta
class commandBase(metaclass=ABCMeta):
def __init__(self,source,supportedTexType):
if not (source.name==supportedTexType):
raise ValueError(source+" is not a "+supportedTexType)
self.source=source
def __str__(self):
return self.source.__str__
def __repr__(self):
return self.source.__repr__()
class newCommand(commandBase):
def __init__(self,source):
super(newCommand, self).__init__(source,"newcommand")
@property
def commandName(self):
return self.source.args[0].value[1:]
@property
def argNum(self):
return 0 if self.source.args[1].type!='optional' else self.source.args[1].value
@property
def result(self):
return nodify(list(self.source.args[len(self.source.args)-1].contents))
def exist(self,tex):
exist=tex.find(self.commandName)!=None
return exist
def run(self,tex):
cmds=list(tex.find_all(self.commandName))
if len(cmds)==0:
raise ValueError("Command not found in tex")
else:
for cmd in cmds:
if self.argNum==0:
res=self.result
else:
resultstr=str(self.result)
res=TexSoup(re.sub('\#([1-{}])'.format(self.argNum),lambda m: cmd.args[int(m.group(1))-1].value,resultstr))
soup=TexSoup(res)
tex.replace(cmd,soup)
def tryrun():
cmds=list(tex.find_all(self.commandName))
if len(cmds)!=0:
for cmd in cmds:
if self.argNum==0:
res=self.result
else:
res=re.sub('#[1-{}]'.format(self.argNum),lambda m: cmd.args[int(m.group(1))-1],self.result)
soup=TexSoup(res)
tex.replace(cmd,soup)
@staticmethod
def runAll(tex,collection):
cmds=[cmd for cmd in collection if cmd.exist(tex)]
if(len(cmds)>0):
for cmd in cmds:
cmd.run(tex)
newCommand.runAll(tex,collection)
class columnAlignment(Enum):
Left = "l"
Right = "r"
Center = "c"
class multiColumn(commandBase):
def __init__(self,source):
super(multiColumn,self).__init__(source,"multicolumn")
@property
def cols(self):
return int(self.source.args[0].value)
@property
def align(self):
return columnAlignment(self.source.args[1].value)
@property
def contents(self):
return nodify(list(self.source.args[2].contents))
def tabularToData(table,commands=None):
if table.name=="tabular":
ctable=str(table)
ctable=ctable.split("\n")
ctable=ctable[1:len(ctable)-1]
rows=[x.strip() for x in ''.join(ctable).split(r'\\') if x.strip()!='']
ltable=[]
ltable=[[c.strip() for c in r.split("&")] for r in rows]
lnewtable=[]
for row in ltable:
r=[]
for item in row:
texitem=TexSoup(item)
child=list(texitem.children)
if(len(child)==1 and child[0].name=="multicolumn"):
mcolel=child[0]
mcol=multiColumn(mcolel)
el=mcol.contents
if commands!=None:
newCommand.runAll(el,commands)
for i in range(int(mcol.cols)):
r.append(el)
else:
if type(item) is str:
el=TexSoup(item)
else:
el=item
el=desarg(el)
if commands!=None:
newCommand.runAll(el,commands)
r.append(el)
lnewtable.append(r)
lens=[len(x) for x in lnewtable]
#Check if all rows have the same dimension
if(len(set(lens))!=1):
raise ValueError("This tabular is not supported")
import numpy as np
table=np.array(lnewtable,TexNode)
return table
else:
raise ValueError("Only tabular LaTeX environment is supported")

0
tools/lib/__init__.py Normal file
View File

185
tools/lib/data.py Normal file
View File

@ -0,0 +1,185 @@
from collections import OrderedDict
from enum import IntEnum,auto,unique
import re
class state:
def __init__(self,number, multiplicity, symetry):
self.number = number
self.multiplicity = multiplicity
self.symetry = symetry
@unique
class dataType(IntEnum):
ABS=auto()
FLUO=auto()
ZPE=auto()
class dataFileBase(object):
def __init__(self):
self.molecule = ''
self.comment = ''
self.code = None
self.method = None
self.excitations = []
self.DOI = ''
@staticmethod
def GetFileType():
pass
@staticmethod
def convertState(StateTablelist,firstState=state(1,1,"A_1")):
tmplst=[]
for TexState in StateTablelist:
st=list(TexState.find("$").contents)[0]
m=re.match(r"^\^(?P<multiplicity>\d)(?P<GPS>\S+)",st)
seq=m.groups()
tmplst.append(seq)
lst=[]
for index,item in enumerate(tmplst):
unformfirststate=(str(firstState.multiplicity),firstState.symetry)
count=([unformfirststate]+tmplst[:index+1]).count(item)
lst.append(state(count,int(item[0]),item[1]))
return lst
@classmethod
def readFromTable(cls, table,column,firstState=state(1,1,"A_1")):
data=cls()
col=table[:,column]
data.molecule=str(col[0])
data.method=method(str(col[2]),str(col[1]))
finsts=cls.convertState(table[3:,0],firstState)
for index,cell in enumerate(col[3:]):
if str(cell)!="":
val= list(cell.contents)[0]
val=float(str(val))
data.excitations.append(excitationValue(firstState,finsts[index],val))
return data
def getMetadata(self):
dic=OrderedDict()
dic["Molecule"]=self.molecule
dic["Comment"]=self.comment
dic["Code"]="" if self.code is None else self.code.toDataString()
dic["Method"]="" if self.method is None else self.method.toDataString()
dic["DOI"]="" if self.DOI is None else self.DOI
return dic
def toFile(self,datadir):
subpath=datadir/self.GetFileType().name.lower()
if not subpath.exists():
subpath.mkdir()
file=subpath/"{}_{}_{}.dat".format(self.molecule.lower(),self.method.name,self.method.basis)
if not file.exists():
with file.open("w") as f:
for key,value in self.getMetadata().items():
if value is not None:
f.write("# {}: {}\n".format(key,value))
f.write("""
# Initial state Final state Energies (eV)
####################### ####################### ###############
# Spin Number Symm Spin Number Symm E_{}\n""".format(self.GetFileType().name.lower()))
for ex in self.excitations:
mystr=" {:6s}{:9s}{:10s}{:6s}{:8s}{:13s}{}\n".format(str(ex.initial.number),str(ex.initial.multiplicity),ex.initial.symetry,str(ex.final.number),str(ex.final.multiplicity),ex.final.symetry,str(ex.value))
f.write(mystr)
class method:
def __init__(self,name, basis):
self.name = name
self.basis = basis
@staticmethod
def fromString(string):
vals = string.split(",")
if (vals.length == 2):
return method(vals[0], vals[1])
else:
return method(vals[0], None)
def __str__(self):
string = self.name
if (self.basis):
string+= '/' + self.basis
return string
def toDataString(self):
string=self.name
if (self.basis):
string+=","+self.basis
return string
class code:
def __init__(self,name, version):
self.name = name
self.version = version
def toDataString(self):
string=self.name
if (self.version):
string+=","+self.version
return string
class oneStateDataFileBase(dataFileBase):
def __init__(self):
super(oneStateDataFileBase,self).__init__()
self.geometry = None
def getMetadata(self):
dic=super(oneStateDataFileBase,self).getMetadata()
dic["Geometry"]= "" if self.geometry is None else self.geometry.toDataString()
dic.move_to_end("DOI")
return dic
@classmethod
def readFromTable(cls, table,column,firstState=state(1,1,"A_1")):
data=super().readFromTable(table,column,firstState=firstState)
return data
class AbsDataFile(oneStateDataFileBase):
def __init__(self):
super(AbsDataFile,self).__init__()
@staticmethod
def GetFileType():
return dataType.ABS
class FluoDataFile(oneStateDataFileBase):
def __init__(self):
super(FluoDataFile,self).__init__()
@staticmethod
def GetFileType():
return dataType.FLUO
class twoStateDataFileBase(dataFileBase):
def __init__(self):
super(twoStateDataFileBase,self).__init__()
self.GS=None
self.ES=None
@classmethod
def readFromTable(cls, table,column,firstState=state(1,1,"A_1")):
data=super().readFromTable(table,column,firstState=firstState)
return data
def getMetadata(self):
dic=super(twoStateDataFileBase,self).getMetadata()
dic["GS"]= "" if self.GS is None else self.GS.toDataString()
dic["ES"]="" if self.ES is None else self.ES.toDataString()
dic.move_to_end("DOI")
return dic
class ZPEDataFile(twoStateDataFileBase):
def __init__(self):
super(ZPEDataFile,self).__init__()
@staticmethod
def GetFileType():
return dataType.ZPE
class excitationBase:
def __init__(self,initial, final):
self.initial = initial
self.final = final
class excitationValue(excitationBase):
def __init__(self,initial, final, value):
super(excitationValue,self).__init__(initial, final)
self.value = value

23
tools/lib/utils.py Normal file
View File

@ -0,0 +1,23 @@
import itertools
import sys
from TexSoup import TexEnv,TexNode, RArg
from collections.abc import Iterable
def nodify(TexArray,envName="[tex]",parent=None):
env=TexEnv(envName,TexArray)
node=TexNode(env)
node.parent=parent
return node
def desarg(tex):
lst=[]
for item in tex.contents:
if type(item) is RArg:
myitem=item.contents
if type(myitem) is list:
for myit in myitem:
lst.append(myit)
else:
lst.append(myit)
else:
myitem=item
lst.append(myitem)
return nodify(lst,tex.name,tex.parent)