mirror of
https://github.com/LCPQ/QUESTDB_website.git
synced 2025-01-12 05:58:23 +01:00
Add many options to datafilebuilder input to exclude columns, set defaultBasis (for formats that need it) or add metadata like geometry or set
This commit is contained in:
parent
c3f18d575e
commit
d13096959c
@ -28,7 +28,7 @@ elif args.file!=None:
|
||||
else:
|
||||
texOps=dfbOptions()
|
||||
commands=[LaTeX.newCommand(cmd) for cmd in soup.find_all("newcommand")]
|
||||
dat=LaTeX.tabularToData(soup.tabular,commands)
|
||||
dat=LaTeX.tabularToData(soup.tabular,commands,texOps.excludeColumns)
|
||||
scriptpath=Path(sys.argv[0]).resolve()
|
||||
datapath=scriptpath.parents[1]/"static"/"data"
|
||||
if args.debug:
|
||||
|
@ -4,6 +4,7 @@ from .utils import *
|
||||
import itertools
|
||||
from enum import Enum
|
||||
from abc import ABCMeta
|
||||
|
||||
class commandBase(metaclass=ABCMeta):
|
||||
def __init__(self,source,supportedTexType):
|
||||
if not (source.name==supportedTexType):
|
||||
@ -15,7 +16,6 @@ class commandBase(metaclass=ABCMeta):
|
||||
|
||||
def __repr__(self):
|
||||
return self.source.__repr__()
|
||||
|
||||
class newCommand(commandBase):
|
||||
def __init__(self,source):
|
||||
super(newCommand, self).__init__(source,"newcommand")
|
||||
@ -96,7 +96,7 @@ class multiColumn(commandBase):
|
||||
@property
|
||||
def contents(self):
|
||||
return nodify(list(self.source.args[2].contents))
|
||||
def tabularToData(table,commands=None):
|
||||
def tabularToData(table,commands=None,excludeColumn=set()):
|
||||
if table.name=="tabular":
|
||||
ctable=str(table)
|
||||
ctable=ctable.split("\n")
|
||||
@ -136,6 +136,7 @@ def tabularToData(table,commands=None):
|
||||
raise ValueError("This tabular is not supported because lines have not the same column numbers for each row the coulumns numbers are {}".format(lens))
|
||||
import numpy as np
|
||||
table=np.array(lnewtable,TexNode)
|
||||
table=np.delete(table,list(excludeColumn),1)
|
||||
return table
|
||||
else:
|
||||
raise ValueError("Only tabular LaTeX environment is supported")
|
||||
|
@ -1,6 +1,6 @@
|
||||
from TexSoup import TexSoup,TexCmd
|
||||
from . import formats
|
||||
from .data import dataFileBase,DataType,state
|
||||
from .data import dataFileBase,DataType, method, state, exSet
|
||||
from collections import defaultdict
|
||||
|
||||
class dfbOptions(object):
|
||||
@ -10,6 +10,10 @@ class dfbOptions(object):
|
||||
self.suffix=None
|
||||
self.initialStates=defaultdict(lambda : state(1,1,"A_1"))
|
||||
self.isDouble=False
|
||||
self.defaultBasis="aug-cc-pVTZ"
|
||||
self.geometries = defaultdict(lambda :method("CC3","aug-cc-pVTZ"))
|
||||
self.set = ''
|
||||
self.excludeColumns=set()
|
||||
@staticmethod
|
||||
def readFromEnv(lateEnv):
|
||||
dfb_Opt=dfbOptions()
|
||||
@ -37,8 +41,8 @@ class dfbOptions(object):
|
||||
vRArgs=[arg.value for arg in initialState.args if arg.type=="required"]
|
||||
vOArgs=[arg.value for arg in initialState.args if arg.type=="optional"]
|
||||
if len(vOArgs)==0:
|
||||
defaultstate=state.fromString("1 "+vRArgs[0])
|
||||
dfb_Opt.initialStates.default_factory=lambda : defaultstate
|
||||
defaultgeometry=state.fromString("1 "+vRArgs[0])
|
||||
dfb_Opt.initialStates.default_factory=lambda : defaultgeometry
|
||||
else:
|
||||
mystate=state.fromString("1 "+vRArgs[0])
|
||||
dfb_Opt.initialStates[vOArgs[0]]=mystate
|
||||
@ -58,4 +62,41 @@ class dfbOptions(object):
|
||||
raise ValueError("\isDouble must be 'true' or 'false'.")
|
||||
else:
|
||||
raise ValueError("Arguments error on '\isDouble'. Only one optional argument is supported.")
|
||||
dfbDefaultBasisNode=lateEnv.defaultBasis
|
||||
if dfbDefaultBasisNode!=None:
|
||||
dfbDefaultBasis=dfbDefaultBasisNode.expr
|
||||
if type(dfbDefaultBasis) is TexCmd:
|
||||
dfb_Opt.defaultBasis=dfbDefaultBasis.args[0].value
|
||||
dfbgeometryNodes=list(lateEnv.find_all("geometry"))
|
||||
for node in dfbgeometryNodes:
|
||||
geometry=node.expr
|
||||
if type(geometry) is TexCmd:
|
||||
vRArgs=[arg.value for arg in geometry.args if arg.type=="required"]
|
||||
vOArgs=[arg.value for arg in geometry.args if arg.type=="optional"]
|
||||
if len(vOArgs)==0:
|
||||
defaultgeometry=method(vRArgs[0],vRArgs[1])
|
||||
dfb_Opt.geometries.default_factory=lambda : defaultgeometry
|
||||
else:
|
||||
mygeometry=method(vRArgs[0],vRArgs[1])
|
||||
dfb_Opt.geometries[vOArgs[0]]=mygeometry
|
||||
dfbSetNode=lateEnv.set
|
||||
if dfbSetNode!=None:
|
||||
dfbSet=dfbSetNode.expr
|
||||
if type(dfbSet) is TexCmd:
|
||||
setname=dfbSet.args[0].value
|
||||
index=dfbSet.args[1].value
|
||||
dfb_Opt.set=exSet(setname,index=int(index))
|
||||
else:
|
||||
dfbStrSetNode=lateEnv.strSet
|
||||
if dfbStrSetNode!=None:
|
||||
dfbStrSet=dfbStrSetNode.expr
|
||||
if type(dfbStrSet) is TexCmd:
|
||||
dfb_Opt.set=setname=dfbStrSet.args[0].value
|
||||
dfbexcludeColumnsNodes=list(lateEnv.find_all("excludecolumns"))
|
||||
for node in dfbexcludeColumnsNodes:
|
||||
excludeColumns=node.expr
|
||||
if type(excludeColumns) is TexCmd:
|
||||
commas_string=excludeColumns.args[0].value
|
||||
ints=[int(x.strip()) for x in commas_string.split(",")]
|
||||
dfb_Opt.excludeColumns.update(ints)
|
||||
return dfb_Opt
|
@ -4,14 +4,14 @@ from ...data import dataFileBase,DataType,method,excitationValue,datafileSelecto
|
||||
from ...utils import getValFromCell, checkFloat
|
||||
@formatName("TBE")
|
||||
class TBEHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
subtablesRange=getSubtablesRange(table)
|
||||
for myrange in subtablesRange:
|
||||
datacls=dict()
|
||||
mymolecule=str(table[myrange[0],0])
|
||||
initialState=self.TexOps.initialStates[mymolecule]
|
||||
mymethod=(method("TBE","aug-cc-pVTZ"),method("TBE(Full)","CBS"))
|
||||
mymethod=(method("TBE",self.TexOps.defaultBasis),method("TBE(Full)","CBS"))
|
||||
finsts=dataFileBase.convertState(table[myrange,1],initialState,default=self.TexOps.defaultType,commands=self.Commands)
|
||||
for index,row in enumerate(table[myrange,]):
|
||||
oscilatorForces=checkFloat(str(row[2]))
|
||||
|
@ -5,7 +5,7 @@ from ...utils import getValFromCell
|
||||
import numpy as np
|
||||
@formatName("column")
|
||||
class columnHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
subtablesRange=getSubtablesRange(table)
|
||||
for myrange in subtablesRange:
|
||||
|
@ -8,7 +8,7 @@ import numpy as np
|
||||
from ...utils import getValFromCell
|
||||
@formatName("doubleColumn")
|
||||
class doubleColumnHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
datacls=dict()
|
||||
subtablesMol=getSubtablesRange(table)
|
||||
|
@ -7,7 +7,7 @@ from TexSoup import TexSoup
|
||||
import re
|
||||
@formatName("doubleTBE")
|
||||
class doubleTBEHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
subtablesMol=getSubtablesRange(table)
|
||||
for rangeMol in subtablesMol:
|
||||
|
@ -8,7 +8,7 @@ import numpy as np
|
||||
import json
|
||||
@formatName("exoticColumn")
|
||||
class exoticColumnHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
subtablesRange=getSubtablesRange(table)
|
||||
for myrange in subtablesRange:
|
||||
|
@ -56,7 +56,7 @@ class fromXLSToLaTeXHandler(formatHandlerBase):
|
||||
else:
|
||||
return resultstr
|
||||
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
subtablesRange=getSubtablesRange(table,firstindex=1,column=1)
|
||||
for myrange in subtablesRange:
|
||||
@ -65,7 +65,7 @@ class fromXLSToLaTeXHandler(formatHandlerBase):
|
||||
initialState=self.TexOps.initialStates[mymolecule]
|
||||
for col in itertools.chain(range(8,11), range(14,np.size(table,1))):
|
||||
col=table[:,col]
|
||||
basis="aug-cc-pVTZ"
|
||||
basis=self.TexOps.defaultBasis
|
||||
mymethcell=list(col[0])
|
||||
if len(mymethcell)==0:
|
||||
continue
|
||||
|
@ -5,7 +5,7 @@ from ...utils import getValFromCell
|
||||
import numpy as np
|
||||
@formatName("line")
|
||||
class lineHandler(formatHandlerBase):
|
||||
def readFromTable(self,table):
|
||||
def _readFromTableCore(self,table):
|
||||
datalist=list()
|
||||
for col in range(1,np.size(table,1)):
|
||||
col=table[:,col]
|
||||
|
@ -4,5 +4,15 @@ class formatHandlerBase(object, metaclass=ABCMeta):
|
||||
self.TexOps=TexOps
|
||||
self.Commands=commands
|
||||
@abstractmethod
|
||||
def _readFromTableCore(self,table):
|
||||
raise NotImplementedError()
|
||||
|
||||
def readFromTable(self,table):
|
||||
raise NotImplementedError()
|
||||
dataFiles=self._readFromTableCore(table)
|
||||
for file in dataFiles:
|
||||
self.__applyMetadataFromOptions(file)
|
||||
return dataFiles
|
||||
def __applyMetadataFromOptions(self,file):
|
||||
if hasattr(file, "geometry"):
|
||||
file.geometry = self.TexOps.geometries[file.molecule]
|
||||
file.set = self.TexOps.set
|
||||
|
Loading…
Reference in New Issue
Block a user