10
0
mirror of https://github.com/LCPQ/QUESTDB_website.git synced 2024-06-02 11:25:34 +02:00

Add many options to datafilebuilder input to exclude columns, set defaultBasis (for formats that need it) or add metadata like geometry or set

This commit is contained in:
Mickaël Véril 2021-11-09 10:34:48 +01:00 committed by Anthony Scemama
parent c3f18d575e
commit d13096959c
11 changed files with 68 additions and 16 deletions

View File

@ -28,7 +28,7 @@ elif args.file!=None:
else:
texOps=dfbOptions()
commands=[LaTeX.newCommand(cmd) for cmd in soup.find_all("newcommand")]
dat=LaTeX.tabularToData(soup.tabular,commands)
dat=LaTeX.tabularToData(soup.tabular,commands,texOps.excludeColumns)
scriptpath=Path(sys.argv[0]).resolve()
datapath=scriptpath.parents[1]/"static"/"data"
if args.debug:

View File

@ -4,6 +4,7 @@ from .utils import *
import itertools
from enum import Enum
from abc import ABCMeta
class commandBase(metaclass=ABCMeta):
def __init__(self,source,supportedTexType):
if not (source.name==supportedTexType):
@ -15,7 +16,6 @@ class commandBase(metaclass=ABCMeta):
def __repr__(self):
return self.source.__repr__()
class newCommand(commandBase):
def __init__(self,source):
super(newCommand, self).__init__(source,"newcommand")
@ -96,7 +96,7 @@ class multiColumn(commandBase):
@property
def contents(self):
return nodify(list(self.source.args[2].contents))
def tabularToData(table,commands=None):
def tabularToData(table,commands=None,excludeColumn=set()):
if table.name=="tabular":
ctable=str(table)
ctable=ctable.split("\n")
@ -136,6 +136,7 @@ def tabularToData(table,commands=None):
raise ValueError("This tabular is not supported because lines have not the same column numbers for each row the coulumns numbers are {}".format(lens))
import numpy as np
table=np.array(lnewtable,TexNode)
table=np.delete(table,list(excludeColumn),1)
return table
else:
raise ValueError("Only tabular LaTeX environment is supported")

View File

@ -1,6 +1,6 @@
from TexSoup import TexSoup,TexCmd
from . import formats
from .data import dataFileBase,DataType,state
from .data import dataFileBase,DataType, method, state, exSet
from collections import defaultdict
class dfbOptions(object):
@ -10,6 +10,10 @@ class dfbOptions(object):
self.suffix=None
self.initialStates=defaultdict(lambda : state(1,1,"A_1"))
self.isDouble=False
self.defaultBasis="aug-cc-pVTZ"
self.geometries = defaultdict(lambda :method("CC3","aug-cc-pVTZ"))
self.set = ''
self.excludeColumns=set()
@staticmethod
def readFromEnv(lateEnv):
dfb_Opt=dfbOptions()
@ -37,8 +41,8 @@ class dfbOptions(object):
vRArgs=[arg.value for arg in initialState.args if arg.type=="required"]
vOArgs=[arg.value for arg in initialState.args if arg.type=="optional"]
if len(vOArgs)==0:
defaultstate=state.fromString("1 "+vRArgs[0])
dfb_Opt.initialStates.default_factory=lambda : defaultstate
defaultgeometry=state.fromString("1 "+vRArgs[0])
dfb_Opt.initialStates.default_factory=lambda : defaultgeometry
else:
mystate=state.fromString("1 "+vRArgs[0])
dfb_Opt.initialStates[vOArgs[0]]=mystate
@ -58,4 +62,41 @@ class dfbOptions(object):
raise ValueError("\isDouble must be 'true' or 'false'.")
else:
raise ValueError("Arguments error on '\isDouble'. Only one optional argument is supported.")
dfbDefaultBasisNode=lateEnv.defaultBasis
if dfbDefaultBasisNode!=None:
dfbDefaultBasis=dfbDefaultBasisNode.expr
if type(dfbDefaultBasis) is TexCmd:
dfb_Opt.defaultBasis=dfbDefaultBasis.args[0].value
dfbgeometryNodes=list(lateEnv.find_all("geometry"))
for node in dfbgeometryNodes:
geometry=node.expr
if type(geometry) is TexCmd:
vRArgs=[arg.value for arg in geometry.args if arg.type=="required"]
vOArgs=[arg.value for arg in geometry.args if arg.type=="optional"]
if len(vOArgs)==0:
defaultgeometry=method(vRArgs[0],vRArgs[1])
dfb_Opt.geometries.default_factory=lambda : defaultgeometry
else:
mygeometry=method(vRArgs[0],vRArgs[1])
dfb_Opt.geometries[vOArgs[0]]=mygeometry
dfbSetNode=lateEnv.set
if dfbSetNode!=None:
dfbSet=dfbSetNode.expr
if type(dfbSet) is TexCmd:
setname=dfbSet.args[0].value
index=dfbSet.args[1].value
dfb_Opt.set=exSet(setname,index=int(index))
else:
dfbStrSetNode=lateEnv.strSet
if dfbStrSetNode!=None:
dfbStrSet=dfbStrSetNode.expr
if type(dfbStrSet) is TexCmd:
dfb_Opt.set=setname=dfbStrSet.args[0].value
dfbexcludeColumnsNodes=list(lateEnv.find_all("excludecolumns"))
for node in dfbexcludeColumnsNodes:
excludeColumns=node.expr
if type(excludeColumns) is TexCmd:
commas_string=excludeColumns.args[0].value
ints=[int(x.strip()) for x in commas_string.split(",")]
dfb_Opt.excludeColumns.update(ints)
return dfb_Opt

View File

@ -4,14 +4,14 @@ from ...data import dataFileBase,DataType,method,excitationValue,datafileSelecto
from ...utils import getValFromCell, checkFloat
@formatName("TBE")
class TBEHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
subtablesRange=getSubtablesRange(table)
for myrange in subtablesRange:
datacls=dict()
mymolecule=str(table[myrange[0],0])
initialState=self.TexOps.initialStates[mymolecule]
mymethod=(method("TBE","aug-cc-pVTZ"),method("TBE(Full)","CBS"))
mymethod=(method("TBE",self.TexOps.defaultBasis),method("TBE(Full)","CBS"))
finsts=dataFileBase.convertState(table[myrange,1],initialState,default=self.TexOps.defaultType,commands=self.Commands)
for index,row in enumerate(table[myrange,]):
oscilatorForces=checkFloat(str(row[2]))

View File

@ -5,7 +5,7 @@ from ...utils import getValFromCell
import numpy as np
@formatName("column")
class columnHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
subtablesRange=getSubtablesRange(table)
for myrange in subtablesRange:

View File

@ -8,7 +8,7 @@ import numpy as np
from ...utils import getValFromCell
@formatName("doubleColumn")
class doubleColumnHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
datacls=dict()
subtablesMol=getSubtablesRange(table)

View File

@ -7,7 +7,7 @@ from TexSoup import TexSoup
import re
@formatName("doubleTBE")
class doubleTBEHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
subtablesMol=getSubtablesRange(table)
for rangeMol in subtablesMol:

View File

@ -8,7 +8,7 @@ import numpy as np
import json
@formatName("exoticColumn")
class exoticColumnHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
subtablesRange=getSubtablesRange(table)
for myrange in subtablesRange:

View File

@ -56,7 +56,7 @@ class fromXLSToLaTeXHandler(formatHandlerBase):
else:
return resultstr
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
subtablesRange=getSubtablesRange(table,firstindex=1,column=1)
for myrange in subtablesRange:
@ -65,7 +65,7 @@ class fromXLSToLaTeXHandler(formatHandlerBase):
initialState=self.TexOps.initialStates[mymolecule]
for col in itertools.chain(range(8,11), range(14,np.size(table,1))):
col=table[:,col]
basis="aug-cc-pVTZ"
basis=self.TexOps.defaultBasis
mymethcell=list(col[0])
if len(mymethcell)==0:
continue

View File

@ -5,7 +5,7 @@ from ...utils import getValFromCell
import numpy as np
@formatName("line")
class lineHandler(formatHandlerBase):
def readFromTable(self,table):
def _readFromTableCore(self,table):
datalist=list()
for col in range(1,np.size(table,1)):
col=table[:,col]

View File

@ -4,5 +4,15 @@ class formatHandlerBase(object, metaclass=ABCMeta):
self.TexOps=TexOps
self.Commands=commands
@abstractmethod
def _readFromTableCore(self,table):
raise NotImplementedError()
def readFromTable(self,table):
raise NotImplementedError()
dataFiles=self._readFromTableCore(table)
for file in dataFiles:
self.__applyMetadataFromOptions(file)
return dataFiles
def __applyMetadataFromOptions(self,file):
if hasattr(file, "geometry"):
file.geometry = self.TexOps.geometries[file.molecule]
file.set = self.TexOps.set