Merge pull request #126 from TREX-CoE/text-fallback-python

Text fallback when building python API
This commit is contained in:
Anthony Scemama 2023-06-08 17:03:36 +02:00 committed by GitHub
commit 81818a40ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 192 additions and 75 deletions

View File

@ -37,7 +37,7 @@ CLEANFILES = trexio.mod
if HAVE_FORTRAN
BUILT_SOURCES = trexio.mod
else
BUILT_SOURCES =
BUILT_SOURCES =
endif
EXTRA_DIST = .git_hash
@ -261,10 +261,18 @@ pytrexio_i = $(srcdir)/src/pytrexio.i
numpy_i = $(srcdir)/src/numpy.i
if HAVE_HDF5
python-test: $(TEST_PY)
python3 -m pytest -v --all $(TEST_PY)
$(RM) -r -- __pycache__
$(RM) -f -- test_file_py.h5 unsafe_test_file_py.h5
$(RM) -rf -- test_file_py.dir unsafe_test_file_py.dir
else
python-test: $(TEST_PY)
python3 -m pytest -v $(TEST_PY)
$(RM) -r -- __pycache__
$(RM) -f -- test_file_py.h5 unsafe_test_file_py.h5
$(RM) -r -- __pycache__
$(RM) -rf -- test_file_py.dir unsafe_test_file_py.dir
endif
python-install: $(pytrexio_py) $(setup_py) $(setup_cfg)
cd python && \
@ -274,11 +282,15 @@ python-sdist: $(pytrexio_py) $(setup_py) $(setup_cfg)
cd python && \
python3 -m build --sdist
if HAVE_HDF5
$(pytrexio_py): $(pytrexio_c)
cd tools && ./prepare_python.sh
else
$(pytrexio_py): $(pytrexio_c)
cd tools && ./prepare_python.sh --without-hdf5
endif
# Build Python module and C wrapper code for TREXIO using SWIG
# [?] swig -python -threads pytrexio.i ----> Add thread support for all the interface
$(pytrexio_c): $(ORG_FILES) $(GENERATOR_FILES) $(trexio_h) $(pytrexio_i) $(numpy_i)
@if [[ $(SWIG).x != ".x" ]] ; then \
cp $(trexio_h) src/ ; \
@ -307,7 +319,7 @@ DEB_FILES = \
helpers-debian/libtrexio0.install \
helpers-debian/libtrexio-dev.install \
helpers-debian/source \
helpers-debian/README.source
helpers-debian/README.source
debian_from_dist: $(DEB_FILES) $(SOURCES) $(trexio_h)
cp ../trexio-$(PACKAGE_VERSION).tar.gz ../libtrexio_$(PACKAGE_VERSION).orig.tar.gz
@ -317,6 +329,9 @@ debian_from_dist: $(DEB_FILES) $(SOURCES) $(trexio_h)
CLEANFILES += $(pytrexio_c) \
$(pytrexio_py) \
$(trexio_py) \
$(srcdir)/src/trexio.py \
$(srcdir)/src/pytrexio.py \
$(srcdir)/src/trexio.h \
python/src/*.c \
python/src/*.h

View File

@ -4,6 +4,7 @@ include examples/notebooks/*
include examples/README.md
include requirements.txt tools/set_NUMPY_INCLUDEDIR.sh
include test/benzene_data.py
include test/conftest.py
exclude examples/LICENSE
exclude examples/requirements.txt

View File

@ -40,7 +40,7 @@ if numpy_isUndefined and not do_sdist:
rootpath = os.path.dirname(os.path.abspath(__file__))
srcpath = os.path.join(rootpath, 'src')
c_files = ['trexio.c', 'trexio_hdf5.c', 'trexio_text.c', 'pytrexio_wrap.c']
c_files = ['trexio.c', 'trexio_text.c']
with open("README.md", "r") as fh:
@ -61,12 +61,15 @@ if not version_r:
# The block below is needed to derive additional flags related to the HDF5 library,
# which is required to build pytrexio extension module during the setup.py execution
h5_present = False
h5_ldflags_withl = os.environ.get("H5_LDFLAGS", None)
h5_cflags_withI = os.environ.get("H5_CFLAGS", None)
h5_ldflags_isUndefined = h5_ldflags_withl is None or h5_ldflags_withl==""
h5_cflags_isUndefined = h5_cflags_withI is None or h5_cflags_withI==""
h5_present = (not h5_ldflags_isUndefined) & (not h5_cflags_isUndefined)
if (h5_ldflags_isUndefined or h5_cflags_isUndefined) and not do_sdist:
try:
@ -76,31 +79,51 @@ if (h5_ldflags_isUndefined or h5_cflags_isUndefined) and not do_sdist:
try:
assert pk.exists('hdf5')
h5_present = True
except AssertionError:
raise Exception("pkg-config could not locate HDF5")
print("pkg-config could not locate HDF5; installing TREXIO with TEXT back-end only!")
h5_cflags_withI = pk.cflags('hdf5')
h5_ldflags_withl = pk.libs('hdf5')
if h5_present:
h5_cflags_withI = pk.cflags('hdf5')
h5_ldflags_withl = pk.libs('hdf5')
h5_cflags = h5_cflags_withI.replace("-I","").split(" ")[0] if not do_sdist else ""
h5_ldflags = h5_ldflags_withl.split(" ")[0] if not do_sdist else ""
if h5_present:
h5_cflags = h5_cflags_withI.replace("-I","").split(" ")[0] if not do_sdist else ""
h5_ldflags = h5_ldflags_withl.split(" ")[0] if not do_sdist else ""
c_files.append('trexio_hdf5.c')
# ============================ End of the HDF5 block ============================ #
# Define pytrexio extension module based on TREXIO source codes + SWIG-generated wrapper
pytrexio_module = Extension('pytrexio._pytrexio',
c_files.append('pytrexio_wrap.c')
compile_args = [
'-std=c99',
'-Wno-discarded-qualifiers',
'-Wno-unused-variable',
'-Wno-unused-but-set-variable'
]
# if config.h is present then we are building via Autotools
if os.path.isfile(os.path.join(srcpath, "config.h")):
compile_args.append('-DHAVE_CONFIG_H')
# explicit hack needed when building from sdist tarball
if h5_present:
compile_args.append('-DHAVE_HDF5')
# define C extension module
if h5_present:
pytrexio_module = Extension('pytrexio._pytrexio',
sources = [os.path.join(srcpath, code) for code in c_files],
include_dirs = [h5_cflags, srcpath, numpy_includedir],
libraries = ['hdf5' ],
extra_compile_args = [
'-std=c99',
'-Wno-discarded-qualifiers',
'-Wno-unused-variable',
'-Wno-unused-but-set-variable'
],
extra_compile_args = compile_args,
extra_link_args = [h5_ldflags]
)
else:
pytrexio_module = Extension('pytrexio._pytrexio',
sources = [os.path.join(srcpath, code) for code in c_files],
include_dirs = [srcpath, numpy_includedir],
extra_compile_args = compile_args
)
setup(name = 'trexio',

17
python/test/conftest.py Normal file
View File

@ -0,0 +1,17 @@
# content of conftest.py
import pytest
def pytest_addoption(parser):
parser.addoption(
"--all", action="store_true", help="Run all back-ends"
)
def pytest_generate_tests(metafunc):
if "backend" in metafunc.fixturenames:
if metafunc.config.getoption("all"):
backends = ['hdf5', 'text']
else:
backends = ['text']
metafunc.parametrize("backend", backends)

View File

@ -6,19 +6,22 @@ import trexio
from benzene_data import *
FILENAME = 'test_file_py.h5'
BACK_END = trexio.TREXIO_HDF5
def clean():
def clean(back_end, filename):
"""Remove test files."""
import os
try:
os.remove(FILENAME)
os.remove('unsafe_' + FILENAME)
except FileNotFoundError:
pass
if back_end == trexio.TREXIO_HDF5:
import os
try:
os.remove(filename)
os.remove('unsafe_' + filename)
except FileNotFoundError:
pass
else:
import shutil
try:
shutil.rmtree(filename)
shutil.rmtree('unsafe_' + filename)
except FileNotFoundError:
pass
def test_info():
@ -29,7 +32,7 @@ def test_info():
def test_void():
"""Check raise of an error upon I/O on non-existing file."""
with pytest.raises(trexio.Error):
_ = trexio.File('void.file', 'r', BACK_END)
_ = trexio.File('void.file', 'r', trexio.TREXIO_TEXT)
def test_orbital_list():
@ -53,20 +56,30 @@ def test_bitfield_list():
class TestIO:
"""Unit tests for writing/reading different blocks of the TREXIO file."""
filename = FILENAME
back_end = BACK_END
mode = 'w'
test_file = None
clean()
@pytest.fixture(autouse=True)
def setup(self, backend):
self.mode = 'w'
self.test_file = None
if backend == 'hdf5':
self.back_end = trexio.TREXIO_HDF5
self.filename = 'test_file_py.h5'
elif backend == 'text':
self.back_end = trexio.TREXIO_TEXT
self.filename = 'test_file_py.dir'
else:
raise ValueError("Wrong TREXIO back-end supplied to pytest.")
def __del__(self):
if self.test_file:
if self.test_file.isOpen:
self.test_file.close()
def test_clean(self):
"""Clean existing files."""
clean(self.back_end, self.filename)
#def __del__(self):
# """Class destructor."""
# if self.test_file:
# if self.test_file.isOpen:
# self.test_file.close()
def open(self, filename=None, mode=None, back_end=None):
"""Create a TREXIO file and open it for writing."""
if not filename:
@ -107,12 +120,17 @@ class TestIO:
with pytest.raises(trexio.Error):
trexio.write_nucleus_num(self.test_file, nucleus_num * 2)
if self.test_file.isOpen:
self.test_file.close()
def test_num(self):
"""Write a number."""
self.open()
trexio.write_nucleus_num(self.test_file, nucleus_num)
assert trexio.has_nucleus_num(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_str(self):
@ -120,6 +138,8 @@ class TestIO:
self.open()
trexio.write_nucleus_point_group(self.test_file, point_group)
assert trexio.has_nucleus_point_group(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_array_str(self):
@ -129,6 +149,8 @@ class TestIO:
self.test_num()
trexio.write_nucleus_label(self.test_file, nucleus_label)
assert trexio.has_nucleus_label(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_array_1D(self):
@ -138,6 +160,8 @@ class TestIO:
self.test_num()
trexio.write_nucleus_charge(self.test_file, nucleus_charge)
assert trexio.has_nucleus_charge(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_array_2D(self):
@ -147,6 +171,8 @@ class TestIO:
self.test_num()
trexio.write_nucleus_coord(self.test_file, nucleus_coord)
assert trexio.has_nucleus_coord(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_indices(self):
@ -159,6 +185,8 @@ class TestIO:
# now write the indices
trexio.write_basis_nucleus_index(self.test_file, indices_np)
assert trexio.has_basis_nucleus_index(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_sparse(self):
@ -170,6 +198,8 @@ class TestIO:
offset = 0
trexio.write_ao_2e_int_eri(self.test_file, offset, num_integrals, indices, values)
assert trexio.has_ao_2e_int_eri(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_determinant(self):
@ -189,31 +219,39 @@ class TestIO:
assert trexio.has_determinant_coefficient(self.test_file)
# manually check the consistency between coefficient_size and number of determinants
assert trexio.read_determinant_coefficient_size(self.test_file) == trexio.read_determinant_num(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_delete_group(self):
"""Delete a group."""
self.open(filename='unsafe_' + self.filename, mode='u', back_end=self.back_end)
self.test_num()
self.test_array_1D()
self.test_array_2D()
trexio.write_nucleus_num(self.test_file, nucleus_num)
trexio.write_nucleus_charge(self.test_file, nucleus_charge)
trexio.flush(self.test_file)
assert trexio.has_nucleus_num(self.test_file)
assert trexio.has_nucleus_charge(self.test_file)
assert trexio.has_nucleus(self.test_file)
trexio.delete_nucleus(self.test_file)
assert not trexio.has_nucleus_num(self.test_file)
assert not trexio.has_nucleus_charge(self.test_file)
assert not trexio.has_nucleus_coord(self.test_file)
assert not trexio.has_nucleus(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_has_group(self):
"""Check existense of a group."""
self.open()
assert trexio.has_nucleus(self.test_file)
assert not trexio.has_rdm(self.test_file)
if self.test_file.isOpen:
self.test_file.close()
def test_context_manager(self):
@ -231,6 +269,8 @@ class TestIO:
self.open(mode='r')
num_r = trexio.read_nucleus_num(self.test_file)
assert num_r == nucleus_num
if self.test_file.isOpen:
self.test_file.close()
def test_read_array_1D(self):
@ -240,6 +280,8 @@ class TestIO:
assert charges_np_r.dtype is np.dtype(np.float64)
assert charges_np_r.size == nucleus_num
np.testing.assert_array_almost_equal(charges_np_r, np.array(nucleus_charge), decimal=8)
if self.test_file.isOpen:
self.test_file.close()
def test_read_array_2D(self):
@ -250,6 +292,8 @@ class TestIO:
assert coords_np.dtype is np.dtype(np.float64)
assert coords_np.size == nucleus_num * 3
np.testing.assert_array_almost_equal(coords_np, np.array(nucleus_coord).reshape(nucleus_num,3), decimal=8)
if self.test_file.isOpen:
self.test_file.close()
def test_read_errors(self):
@ -258,6 +302,8 @@ class TestIO:
# unsafe call to read_safe should fail with error message corresponding to TREXIO_UNSAFE_ARRAY_DIM
with pytest.raises(trexio.Error):
_ = trexio.read_nucleus_charge(self.test_file, dim=nucleus_num/2)
if self.test_file.isOpen:
self.test_file.close()
def test_read_integers(self):
@ -276,7 +322,8 @@ class TestIO:
assert indices_np_64.dtype is np.dtype(np.int64)
assert indices_np_64.size == basis_shell_num
assert (indices_np_64 == np.array(nucleus_index)).all()
if self.test_file.isOpen:
self.test_file.close()
def test_sparse_read(self):
"""Read a sparse array."""
@ -300,7 +347,8 @@ class TestIO:
assert read_buf_size == (num_integrals - buf_size)
assert indices_sparse_np[0][0] == offset_file * 4
assert indices_sparse_np[read_buf_size-1][3] == (offset_file + read_buf_size) * 4 - 1
if self.test_file.isOpen:
self.test_file.close()
def test_determinant_read(self):
"""Read the CI determinants."""
@ -320,7 +368,8 @@ class TestIO:
#print(f'First complete read of determinant coefficients: {read_buf_size}')
assert not eof
assert read_buf_size == buf_size
if self.test_file.isOpen:
self.test_file.close()
def test_array_str_read(self):
"""Read an array of strings."""
@ -328,6 +377,8 @@ class TestIO:
labels_r = trexio.read_nucleus_label(self.test_file)
assert len(labels_r) == nucleus_num
assert labels_r == nucleus_label
if self.test_file.isOpen:
self.test_file.close()
def test_str_read(self):
@ -335,3 +386,5 @@ class TestIO:
self.open(mode='r')
point_group_r = trexio.read_nucleus_point_group(self.test_file)
assert point_group_r == point_group
if self.test_file.isOpen:
self.test_file.close()

View File

@ -853,6 +853,14 @@ class File:
return self.state
def flush(self):
"""Flush the data on disk."""
rc = pytr.trexio_flush(self.pytrexio_s)
if rc != TREXIO_SUCCESS:
raise Error(rc)
def inquire(self):
"""Inquire whether a TREXIO file exists."""
self.exists = _inquire(self.filename)
@ -1506,7 +1514,7 @@ def flush(trexio_file):
Parameter is a ~trexio_file~ object that has been created by a call to ~open~ function.
"""
rc = pytr.trexio_flush(trexio_file)
rc = pytr.trexio_flush(trexio_file.pytrexio_s)
if rc != TREXIO_SUCCESS:
raise Error(rc)
#+end_src
@ -2139,7 +2147,7 @@ trexio_read_$group_num$_64 (trexio_t* const file, $group_num_dtype_double$* cons
if (trexio_has_$group_num$(file) != TREXIO_SUCCESS) return TREXIO_ATTR_MISSING;
trexio_exit_code rc = TREXIO_GROUP_READ_ERROR;
switch (file->back_end) {
case TREXIO_TEXT:
@ -3370,7 +3378,7 @@ trexio_read_$group_dset$(trexio_t* const file,
/* Find the maximal value along all dimensions to define the compression technique in the back end */
int64_t max_dim = unique_dims[0];
#if (unique_rank != 1)
#if (unique_rank != 1)
for (uint32_t i = 1; i < unique_rank; i++) {
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
}
@ -3503,7 +3511,7 @@ trexio_write_$group_dset$(trexio_t* const file,
/* Find the maximal value along all dimensions to define the compression technique in the back end */
int64_t max_dim = unique_dims[0];
#if (unique_rank != 1)
#if (unique_rank != 1)
for (uint32_t i = 1; i < unique_rank; i++) {
if (unique_dims[i] > max_dim) max_dim = unique_dims[i];
}

View File

@ -1,5 +1,4 @@
#define TEST_BACKEND_TEXT
#define TEST_BACKEND_HDF5
#define TREXIO_FILE_PREFIX "io_dset_float"
#include "test_macros.h"
#include "io_dset_float.c"

View File

@ -2,4 +2,3 @@
#define TREXIO_FILE_PREFIX "io_dset_float"
#include "test_macros.h"
#include "io_dset_float.c"

View File

@ -1,4 +1,4 @@
#define TEST_BACKEND_TEXT
#define TEST_BACKEND_HDF5
#define TREXIO_FILE_PREFIX "io_dset_int"
#include "test_macros.h"
#include "io_dset_int.c"

View File

@ -2,4 +2,3 @@
#define TREXIO_FILE_PREFIX "io_dset_int"
#include "test_macros.h"
#include "io_dset_int.c"

View File

@ -1,11 +1,22 @@
#!/bin/bash
# We want the script to crash on the 1st error:
set -e
# Check that script is executed from tools directory
if [[ $(basename $PWD) != "tools" ]] ; then
echo "This script should run in the tools directory"
exit -1
fi
DO_HDF5=0
if [[ -z ${1} ]] && [[ "${1}" == "--without-hdf5" ]] ; then
echo "Compiling Python API without the HDF5 back end."
DO_HDF5=1
else
echo "Compiling Python API with the HDF5 back end."
fi
TREXIO_ROOT=$(dirname "${PWD}../")
# First define readonly global variables.
@ -14,9 +25,7 @@ readonly INCLUDIR=${TREXIO_ROOT}/include
readonly TOOLS=${TREXIO_ROOT}/tools
readonly PYDIR=${TREXIO_ROOT}/python
readonly PYTREXIODIR=${PYDIR}/pytrexio
# We want the script to crash on the 1st error:
set -e
readonly PYDIR_TREXIO_H=${PYDIR}/src/trexio.h
# Create src and trexio directories in the python folder if not yet done
mkdir -p ${PYDIR}/src
@ -25,22 +34,16 @@ mkdir -p ${PYTREXIODIR}
# Copy all the source code and header files in the corresponding python directory
cp ${SRC}/pytrexio.py ${PYTREXIODIR}/pytrexio.py
cp ${SRC}/trexio.py ${PYDIR}/trexio.py
cp ${SRC}/*.c ${PYDIR}/src
cp ${SRC}/*.h ${PYDIR}/src
cp ${INCLUDIR}/trexio.h ${PYDIR}/src
cp ${SRC}/trexio.c ${SRC}/trexio_s.h ${SRC}/trexio_private.h ${PYDIR}/src
cp ${SRC}/trexio_text.{c,h} ${PYDIR}/src
cp ${SRC}/pytrexio_wrap.c ${PYDIR}/src/pytrexio_wrap.c
cp ${INCLUDIR}/trexio.h ${PYDIR}/src
cp ${INCLUDIR}/config.h ${PYDIR}/src
# fix needed to define HAVE_HDF5 symbol so that Python extension is always compiled with HDF5 (without including config.h)
# add "#define HAVE_HDF5 1" line after "#include stdint.h" using awk and sed
export LINE_NO=$(($(awk '/stdint.h/{print NR}' ${PYDIR}/src/trexio.h) + 1))
# sed on MacOS is different from GNU sed on Linux and requires special treatment
if [[ "$OSTYPE" == "darwin"* ]]; then
sed -i '' -e "$LINE_NO"'i \
#define HAVE_HDF5 1' "${PYDIR}/src/trexio.h"
else
sed -i -e "$LINE_NO"'i \
#define HAVE_HDF5 1' "${PYDIR}/src/trexio.h"
if [[ ${DO_HDF5} == 0 ]] ; then
cp ${SRC}/trexio_hdf5.{c,h} ${PYDIR}/src
fi
# Copy additional info
cp ${TREXIO_ROOT}/AUTHORS ${TREXIO_ROOT}/LICENSE ${PYDIR}