10
0
mirror of https://github.com/LCPQ/QUESTDB_website.git synced 2024-11-05 05:33:55 +01:00
QUESTDB_website/tools/lib/data.py

348 lines
12 KiB
Python
Raw Normal View History

from collections import OrderedDict
2019-12-12 15:20:56 +01:00
from TexSoup import TexSoup
from .LaTeX import newCommand
from .utils import getValFromCell,checkFloat
from TexSoup import TexNode
2019-11-25 11:47:24 +01:00
from enum import IntEnum,auto,unique,IntFlag
2019-12-03 15:24:29 +01:00
from .Format import Format
import re
2019-12-12 15:20:25 +01:00
import numpy as np
2019-11-25 11:55:25 +01:00
class state:
def __init__(self,number, multiplicity, symetry):
self.number = number
self.multiplicity = multiplicity
self.symetry = symetry
@unique
class dataType(IntEnum):
ABS=auto()
FLUO=auto()
class dataFileBase(object):
def __init__(self):
self.molecule = ''
self.comment = ''
self.code = None
self.method = None
self.excitations = []
self.DOI = ''
2019-11-24 19:40:57 +01:00
@property
def IsTBE(self):
return self.method.name=="TBE"
2019-11-24 19:40:57 +01:00
@staticmethod
def GetFileType():
pass
@staticmethod
2019-12-12 15:20:56 +01:00
def convertState(StateTablelist,default=dataType.ABS,firstState=state(1,1,"A_1"),commands=[]):
tmplst=[]
for TexState in StateTablelist:
2019-12-12 15:20:56 +01:00
math=TexState.find("$")
lst=list(math.contents)
mystr=str(lst[0])
mathsoup=None
try:
mathsoup=TexSoup(mystr)
except:
print(f"Error when parsing latex state: {mystr}")
exit(-1)
2019-12-12 15:20:56 +01:00
newCommand.runAll(mathsoup,commands)
st=str(mathsoup)
2020-02-04 13:05:49 +01:00
m=re.match(r"^\^(?P<multiplicity>\d)(?P<symm>[^\s\[(]*)\s*(?:\[(?:\\mathrm{)?(?P<special>\w)(?:})\])?\s*(:?\((?P<type>[^\)]*)\))?",st)
seq=m.group("multiplicity","symm")
2020-02-17 15:18:08 +01:00
mul=int(m.group("multiplicity"))
symm=m.group("symm")
spgrp=m.group("special")
if spgrp is not None and spgrp=="F":
2019-11-26 14:36:23 +01:00
trsp=dataType.FLUO
else:
trsp=default
tygrp=m.group("type")
2020-02-17 15:18:08 +01:00
tmplst.append((mul,symm,trsp,tygrp))
lst=[]
for index,item in enumerate(tmplst):
2020-02-17 15:18:08 +01:00
unformfirststate=(firstState.multiplicity,firstState.symetry)
countlst=[unformfirststate]+[(it[0],it[1]) for it in tmplst[:index+1]]
countitem=(item[0],item[1])
count=countlst.count(countitem)
lst.append((state(count,item[0],item[1]),item[2],item[3]))
return lst
@staticmethod
2019-12-12 15:20:56 +01:00
def readFromTable(table,format=Format.LINE,default=dataType.ABS ,firstState=state(1,1,"A_1"),commands=[]):
def getSubtableIndex(table,firstindex=2,column=0,count=1):
subtablesindex=list()
2020-03-26 16:29:18 +01:00
i=firstindex+count
while i<np.size(table,0):
if str(table[i,column])!="":
subtablesindex.append((firstindex,i-1))
firstindex=i
2020-03-26 16:29:18 +01:00
i+=count
else:
i+=1
subtablesindex.append((firstindex,np.size(table,0)))
return subtablesindex
datalist=list()
switcher={
dataType.ABS:AbsDataFile,
dataType.FLUO:FluoDataFile,
}
2019-12-03 15:24:29 +01:00
if format==Format.LINE:
for col in range(1,np.size(table,1)):
col=table[:,col]
mymolecule=str(col[0])
mymethod=method(str(col[2]),str(col[1]))
2019-12-17 12:19:15 +01:00
finsts=dataFileBase.convertState(table[3:,0],default=default,firstState=firstState,commands=commands)
datacls=dict()
for index,cell in enumerate(col[3:]):
if str(cell)!="":
val,unsafe=getValFromCell(cell)
finst=finsts[index]
dt=finst[1]
if dt in datacls:
data=datacls[dt]
else:
cl=switcher[dt]
data=cl()
2019-12-17 12:19:15 +01:00
datacls[dt]=data
data.molecule=mymolecule
data.method=mymethod
data.excitations.append(excitationValue(firstState,finst[0],val,type=finst[2],isUnsafe=unsafe))
for value in datacls.values():
datalist.append(value)
2019-12-12 10:11:34 +01:00
return datalist
2019-12-03 15:24:29 +01:00
elif format==Format.COLUMN:
subtablesindex=getSubtableIndex(table)
for first, last in subtablesindex:
for col in range(2,np.size(table,1)):
datacls=dict()
col=table[:,col]
mymolecule=str(table[first,0])
mymethod=method(str(col[1]),str(col[0]))
2019-12-12 15:20:56 +01:00
finsts=dataFileBase.convertState(table[first:last+1,1],default=default,firstState=firstState,commands=commands)
for index,cell in enumerate(col[first:last+1]):
if str(cell)!="":
val,unsafe=getValFromCell(cell)
finst=finsts[index]
dt=finst[1]
if dt in datacls:
data=datacls[dt]
else:
cl=switcher[dt]
data=cl()
data.molecule=mymolecule
data.method=mymethod
datacls[dt]=data
2019-12-03 15:24:29 +01:00
data.excitations.append(excitationValue(firstState,finst[0],val,type=finst[2]))
for value in datacls.values():
datalist.append(value)
return datalist
2020-02-21 12:04:26 +01:00
elif format==Format.DOUBLECOLUMN:
2020-03-26 16:29:18 +01:00
datacls=dict()
subtablesMol=getSubtableIndex(table)
for firstMol, lastMol in subtablesMol:
mymolecule=str(table[firstMol,0])
subtablestrans=getSubtableIndex(table[firstMol:lastMol+1,:],firstindex=0,column=1,count=2)
for firstTrans,lastTrans in subtablestrans:
mytrans=table[firstMol+firstTrans:firstMol+lastTrans+1,:]
mytransdesc=mytrans[0:2,1]
for i in range(2):
try:
mathsoup=TexSoup(mytransdesc[i])
except:
print(f"Error when parsing latex state: {str(mytransdesc[i])}")
exit(-1)
newCommand.runAll(mathsoup,commands)
mytransdesc[i]=str(mathsoup)
for col in range(3,np.size(table,1)):
col=table[:,col]
mybasis=str(col[1])
for index,cell in enumerate(col[firstMol+firstTrans:firstMol+lastTrans+1]):
methodnameAT1=str(table[firstMol+firstTrans+index,2])
PTString=r"($\%T_1$)"
HasT1=methodnameAT1.endswith(PTString)
if HasT1:
2020-03-26 16:29:18 +01:00
methodname=methodnameAT1[:-len(PTString)]
2020-02-21 12:04:26 +01:00
else:
2020-03-26 16:29:18 +01:00
methodname=str(methodnameAT1)
mymethod=method(methodname,mybasis)
strcell=str(cell)
if strcell!="":
if HasT1:
m=re.match(r"^(?P<value>[-+]?\d+\.?\d*)\s*(?:\((?P<T1>\d+\.?\d*)\\\%\))?",strcell)
val,unsafe=getValFromCell(TexSoup(m.group("value")))
T1=m.group("T1")
else:
m=re.match(r"^[-+]?\d+\.?\d*",strcell)
val,unsafe=getValFromCell(TexSoup(m.group(0)))
T1=None
if (mymolecule,mymethod.name,mymethod.basis) in datacls:
data=datacls[(mymolecule,mymethod.name,mymethod.basis)]
else:
data=AbsDataFile()
data.molecule=mymolecule
data.method=mymethod
datacls[(mymolecule,mymethod.name,mymethod.basis)]=data
infin=mytransdesc[0].split(r"\rightarrow")
2020-02-21 12:04:26 +01:00
for i,item in enumerate(infin):
2020-03-26 16:29:18 +01:00
m=re.match(r"^(?P<number>\d)\\[,:;\s]\s*\^(?P<multiplicity>\d)(?P<sym>\S*)",item.strip())
2020-02-21 12:04:26 +01:00
infin[i]=state(m.group("number"),m.group("multiplicity"),m.group("sym"))
2020-03-26 16:29:18 +01:00
data.excitations.append(excitationValue(infin[0],infin[1],val,type=mytransdesc[1],isUnsafe=unsafe,T1=T1))
for value in datacls.values():
datalist.append(value)
return datalist
2019-12-03 15:24:29 +01:00
elif format==Format.TBE:
subtablesindex=getSubtableIndex(table)
2019-12-03 15:24:29 +01:00
for first, last in subtablesindex:
2019-12-05 16:28:02 +01:00
datacls=dict()
2019-12-03 15:24:29 +01:00
mymolecule=str(table[first,0])
mymethod=(method("TBE(FC)"),method("TBE"))
2019-12-12 15:20:56 +01:00
finsts=dataFileBase.convertState(table[first:last+1,1],default=default,firstState=firstState,commands=commands)
2019-12-05 16:28:02 +01:00
for index,row in enumerate(table[first:last+1,]):
oscilatorForces=checkFloat(str(row[2]))
T1 = checkFloat(str(row[3]))
2019-12-18 15:18:46 +01:00
val,unsafe = getValFromCell(row[4])
corr,unsafecorr = getValFromCell(row[7])
2019-12-03 15:24:29 +01:00
finst=finsts[index]
dt=finst[1]
if dt in datacls:
2019-12-11 20:04:10 +01:00
datamtbe = datacls[dt]
2019-12-03 15:24:29 +01:00
else:
cl=switcher[dt]
2019-12-11 20:04:10 +01:00
datamtbe=[]
for met in mymethod:
data=cl()
data.molecule=mymolecule
data.method=met
datamtbe.append(data)
datacls[dt]=datamtbe
2019-12-18 16:52:06 +01:00
vs=[val,corr]
uns=[unsafe,unsafecorr]
2019-12-11 20:04:10 +01:00
for i in range(2):
2019-12-18 15:18:46 +01:00
datamtbe[i].excitations.append(excitationValue(firstState,finst[0],vs[i],type=finst[2],T1=T1,forces=oscilatorForces,isUnsafe=uns[i]))
2019-12-03 15:24:29 +01:00
for value in datacls.values():
2019-12-12 10:11:34 +01:00
for dat in value:
datalist.append(dat)
2019-12-03 15:24:29 +01:00
return datalist
def getMetadata(self):
dic=OrderedDict()
dic["Molecule"]=self.molecule
dic["Comment"]=self.comment
2019-11-12 15:20:54 +01:00
dic["code"]="" if self.code is None else self.code.toDataString()
dic["method"]="" if self.method is None else self.method.toDataString()
dic["DOI"]="" if self.DOI is None else self.DOI
return dic
def toFile(self,datadir):
subpath=datadir/self.GetFileType().name.lower()
if not subpath.exists():
subpath.mkdir()
2019-12-09 13:34:30 +01:00
fileName="{}_{}.dat".format(self.molecule.lower().replace(" ","_"),self.method.name) if self.method.basis==None else "{}_{}_{}.dat".format(self.molecule.lower().replace(" ","_"),self.method.name,self.method.basis)
file=subpath/fileName
if not file.exists():
with file.open("w") as f:
for key,value in self.getMetadata().items():
if value is not None:
2019-11-12 15:20:54 +01:00
f.write("# {:9s}: {}\n".format(key,value))
f.write("""
2019-12-14 12:25:56 +01:00
# Initial state Final state Transition Energies (eV) %T1 Oscilator forces unsafe
####################### ####################### ######################################## ############# ####### ################### ##############
# Number Spin Symm Number Spin Symm type E_{:5s} %T1 f is unsafe\n""".format(self.GetFileType().name.lower()))
for ex in self.excitations:
2020-02-29 15:06:12 +01:00
mystr=" {:7s} {:6s} {:9s} {:7s} {:5s} {:12s} {:39s} {:13s} {:14s} {:13s}{}\n".format(
2019-12-14 17:16:54 +01:00
str(ex.initial.number),
str(ex.initial.multiplicity),
ex.initial.symetry,
str(ex.final.number),
str(ex.final.multiplicity),
ex.final.symetry,"("+str(ex.type)+")" if ex.type is not None else "_",
2019-12-14 17:16:54 +01:00
str(ex.value) if ex.value is not None else "_",
str(ex.T1) if ex.T1 is not None else "_",
str(ex.oscilatorForces) if ex.oscilatorForces is not None else "_",
str(ex.isUnsafe).lower())
f.write(mystr)
class method:
2019-11-20 20:15:53 +01:00
def __init__(self,name, *args):
self.name = name
2019-11-20 20:15:53 +01:00
self.basis=args[0] if len(args)>0 else None
@staticmethod
def fromString(string):
vals = string.split(",")
2019-11-20 20:15:53 +01:00
return method(*vals)
def __str__(self):
string = self.name
if (self.basis):
string+= '/' + self.basis
return string
def toDataString(self):
string=self.name
if (self.basis):
string+=","+self.basis
2019-12-09 13:34:30 +01:00
return string
class code:
def __init__(self,name, version):
self.name = name
self.version = version
def toDataString(self):
string=self.name
if (self.version):
string+=","+self.version
return string
class oneStateDataFileBase(dataFileBase):
def __init__(self):
super(oneStateDataFileBase,self).__init__()
self.geometry = None
def getMetadata(self):
dic=super(oneStateDataFileBase,self).getMetadata()
2019-11-12 15:20:54 +01:00
dic["geom"]= "" if self.geometry is None else self.geometry.toDataString()
dic.move_to_end("DOI")
return dic
class AbsDataFile(oneStateDataFileBase):
def __init__(self):
super(AbsDataFile,self).__init__()
@staticmethod
def GetFileType():
return dataType.ABS
class FluoDataFile(oneStateDataFileBase):
def __init__(self):
super(FluoDataFile,self).__init__()
@staticmethod
def GetFileType():
return dataType.FLUO
class excitationBase:
2019-12-03 15:24:29 +01:00
def __init__(self,initial, final, **kwargs):
self.initial = initial
self.final = final
2019-12-03 15:24:29 +01:00
self.type = kwargs["type"] if "type" in kwargs else None
self.T1 = kwargs["T1"] if "T1" in kwargs else None
2019-12-14 18:06:12 +01:00
self.isUnsafe = kwargs["isUnsafe"] if "isUnsafe" in kwargs else False
class excitationValue(excitationBase):
2019-12-03 15:24:29 +01:00
def __init__(self,initial, final, value,**kwarg):
supkwarg=kwarg.copy()
for item in ["forces","corrected"]:
if item in supkwarg:
supkwarg.pop(item)
super(excitationValue,self).__init__(initial, final,**supkwarg)
2019-11-24 19:40:57 +01:00
self.value = value
2019-12-03 15:24:29 +01:00
self.oscilatorForces=kwarg["forces"] if "forces" in kwarg else None