1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2025-01-05 11:00:30 +01:00

add typemaps and tests for numpy int arrays

+ some cleaning in the Python test and SWIG interface files
This commit is contained in:
q-posev 2021-08-06 14:04:29 +03:00
parent 1388ddef80
commit 808adc7cd8
2 changed files with 74 additions and 87 deletions

View File

@ -1,13 +1,12 @@
import os import os
import shutil import shutil
#import numpy as np import numpy as np
from pytrexio import * from pytrexio import *
# TODO: make a user-friendly more pythonic API that will have to be autogenerated # TODO:
# add Exception handling # 1) make a user-friendly more pythonic API that will have to be autogenerated
# check of dimensions and call to safe API # 2) add Exception handling (can be done easily in the front end python-ic API e.g. try: if function_call(...) == TREXIO_SUCCESS
# conversion to and from numpy arrays
# automatically download (hopefully the latest version) numpy.i using # automatically download (hopefully the latest version) numpy.i using
# wget https://raw.githubusercontent.com/numpy/numpy/main/tools/swig/numpy.i # wget https://raw.githubusercontent.com/numpy/numpy/main/tools/swig/numpy.i
@ -37,7 +36,7 @@ try:
elif TEST_TREXIO_BACKEND == TREXIO_TEXT: elif TEST_TREXIO_BACKEND == TREXIO_TEXT:
shutil.rmtree(output_filename) shutil.rmtree(output_filename)
except: except:
print (f'Test file {output_filename} does not exist') print ('Nothing to remove.')
#=========================================================# #=========================================================#
#============ WRITE THE DATA IN THE TEST FILE ============# #============ WRITE THE DATA IN THE TEST FILE ============#
@ -50,14 +49,32 @@ nucleus_num = 12
rc = trexio_write_nucleus_num(test_file, nucleus_num) rc = trexio_write_nucleus_num(test_file, nucleus_num)
assert rc==TREXIO_SUCCESS assert rc==TREXIO_SUCCESS
charges = doubleArray(nucleus_num) # initialize charge arrays as a list and convert it to numpy array
for i in range(nucleus_num): charges = [6., 6., 6., 6., 6., 6., 1., 1., 1., 1., 1., 1.]
if i < nucleus_num/2: charges_np = np.array(charges, dtype=np.float64)
charges[i] = 6.
else:
charges[i] = 1.
rc = trexio_write_nucleus_charge(test_file, charges) # function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
# from the size of the list/array by SWIG using typemacs from numpy.i
rc = trexio_write_safe_nucleus_charge(test_file, charges_np)
assert rc==TREXIO_SUCCESS
# less Python-ic way to read/write arrays using Array classes (probably more portable to other languages)
#charges = doubleArray(nucleus_num)
#for i in range(nucleus_num):
# if i < nucleus_num/2:
# charges[i] = 6.
# else:
# charges[i] = 1.
#rc = trexio_write_nucleus_charge(test_file, charges)
# initialize arrays of nuclear indices as a list and convert it to numpy array
indices = [i for i in range(nucleus_num)]
# type cast is important here because by default numpy transforms a list of integers into int64 array
indices_np = np.array(indices, dtype=np.int32)
# function call below works with both lists and numpy arrays, dimension needed for memory-safety is derived
# from the size of the list/array by SWIG using typemacs from numpy.i
rc = trexio_write_safe_basis_nucleus_index(test_file, indices_np)
assert rc==TREXIO_SUCCESS assert rc==TREXIO_SUCCESS
point_group = 'B3U' point_group = 'B3U'
@ -97,43 +114,43 @@ test_file2 = trexio_open(output_filename, 'r', TEST_TREXIO_BACKEND)
result = trexio_read_nucleus_num(test_file2) result = trexio_read_nucleus_num(test_file2)
assert result[0]==TREXIO_SUCCESS assert result[0]==TREXIO_SUCCESS
assert result[1]==nucleus_num assert result[1]==nucleus_num
#print(result)
charges2 = doubleArray(nucleus_num) # safe call to read_safe array of float values
print(charges2[3]) rc, rcharges_np = trexio_read_safe_nucleus_charge(test_file2, nucleus_num)
#for i in range(nucleus_num):
# charges2[i] = -1.
rc = trexio_read_nucleus_charge(test_file2, charges2)
assert rc==TREXIO_SUCCESS
for i in range(nucleus_num):
assert charges2[i]==charges[i]
#charge_numpy = np.zeros(nucleus_num, dtype=np.float64)
#print(charge_numpy)
rc, charge_numpy = trexio_read_safe_nucleus_charge(test_file2, 12)
print(charge_numpy)
print(charge_numpy[11])
assert rc==TREXIO_SUCCESS assert rc==TREXIO_SUCCESS
assert rcharges_np.dtype is np.dtype(np.float64)
np.testing.assert_array_almost_equal(rcharges_np, charges_np, decimal=8)
# unsafe call to read_safe should not only have return code = TREXIO_UNSAFE_ARRAY_DIM # unsafe call to read_safe should not only have return code = TREXIO_UNSAFE_ARRAY_DIM
# but also should not return numpy array filled with garbage # TODO: it should not return numpy array filled with garbage
rc, charge_numpy = trexio_read_safe_nucleus_charge(test_file2, 12*5) rc, rcharges_fail = trexio_read_safe_nucleus_charge(test_file2, nucleus_num*5)
#print(charge_numpy)
assert rc==TREXIO_UNSAFE_ARRAY_DIM assert rc==TREXIO_UNSAFE_ARRAY_DIM
# less Python-ic way to read/write arrays using Array classes (probably more portable to other languages)
#charges2 = doubleArray(nucleus_num)
#for i in range(nucleus_num):
# charges2[i] = -1.
#rc = trexio_read_nucleus_charge(test_file2, charges2)
#assert rc==TREXIO_SUCCESS
#for i in range(nucleus_num):
# assert charges2[i]==charges[i]
# safe call to read_safe array of int values
rc, rindices_np = trexio_read_safe_basis_nucleus_index(test_file2, nucleus_num)
assert rc==TREXIO_SUCCESS
assert rindices_np.dtype is np.dtype(np.int32)
for i in range(nucleus_num):
assert rindices_np[i]==indices_np[i]
# [WIP]: ideally, the list of strings should be returned as below # [WIP]: ideally, the list of strings should be returned as below
#rc, label_2d = trexio_read_nucleus_label(test_file2, 10) #rc, label_2d = trexio_read_nucleus_label(test_file2, 10)
# [WIP]: currently only low-level routines (return one long string instead of an array of strings) work # [WIP]: currently only low-level routines (return one long string instead of an array of strings) work
rc, labels_1d = trexio_read_nucleus_label_low(test_file2, 10) rc, labels_1d = trexio_read_nucleus_label_low(test_file2, 10)
assert rc==TREXIO_SUCCESS assert rc==TREXIO_SUCCESS
labels_2d = [label for label in labels_1d.split(TREXIO_DELIM) if label] labels_2d = [label for label in labels_1d.split(TREXIO_DELIM) if label]
print(labels_2d) print('Read and parsed nuclear labels:\n', labels_2d)
for i in range(nucleus_num): for i in range(nucleus_num):
assert labels_2d[i]==labels[i] assert labels_2d[i]==labels[i]

View File

@ -10,32 +10,37 @@
/* Include stdint to recognize types from stdint.h */ /* Include stdint to recognize types from stdint.h */
%include <stdint.i> %include <stdint.i>
/* Include carrays to work with C pointers to arrays */
/* NOTE:
carrays was useful before numpy.i was introduced.
For Python interface it's better to use numpy arrays instead of carrays, because the latter are less python-ic.
On the other hand, carrays might be more portable to other target languages.
// Include carrays to work with C pointers to arrays
%include "carrays.i" %include "carrays.i"
/* Include classes that correspond to integer and float arrays */ // Include classes that correspond to integer and float arrays
%array_class(double, doubleArray); %array_class(double, doubleArray);
%array_class(float, floatArray); %array_class(float, floatArray);
%array_class(int32_t, int32Array); %array_class(int32_t, int32Array);
%array_class(int64_t, int64Array); %array_class(int64_t, int64Array);
*/
/* Include typemaps to play with input/output re-casting /* Include typemaps to play with input/output re-casting
Useful when working with C pointers Useful when working with C pointers
*/ */
%include typemaps.i %include typemaps.i
/* Redefine the int32_t* and int64_t* num to be output /* Redefine the int32_t* and int64_t* num to be output
Useful for TREXIO read_num functions where the Useful for TREXIO read_num functions where the
num variable is modified by address num variable is modified by address
*/ */
%apply int *OUTPUT { int32_t* const num}; %apply int *OUTPUT { int32_t* const num};
%apply int *OUTPUT { int64_t* const num}; %apply int *OUTPUT { int64_t* const num};
/* Does not work for arrays (SIGSEGV) /* Does not work for arrays (SIGSEGV) */
%apply double *OUTPUT { double* const dataset };
*/
/* This enables access to trexio_[...]_read_dset_str_low set of functions /* This enables access to trexio_[...]_read_dset_str_low set of functions
in order to return one long string with TREXIO_DELIM delimeter as 2-nd argument of output tuple in order to return one long string with TREXIO_DELIM delimeter as 2-nd argument of output tuple
*/ */
%include <cstring.i> %include <cstring.i>
%cstring_bounded_output(char* dset_out, 1024); %cstring_bounded_output(char* dset_out, 4096);
/* [WIP] TREXIO back ends and exit codes can be redefined in the SWIG target language /* [WIP] TREXIO back ends and exit codes can be redefined in the SWIG target language
using %ignore and further #define statements (instead of disabling the type cast in the trexio.h file) using %ignore and further #define statements (instead of disabling the type cast in the trexio.h file)
@ -47,11 +52,11 @@
#define TREXIO_TEXT 0 #define TREXIO_TEXT 0
*/ */
/* This is an attempt to make SWIG treat double * dset_out, const uint64_t dim_out pattern /* This is an attempt to make SWIG treat double * dset_out|_in, int64_t dim_out|_in pattern
as a special case in order to return the NumPy array to Python from C pointer to array as a special case in order to return the NumPy array to Python from C pointer to array
provided by trexio_read_safe_[dset_num] function. provided by trexio_read_safe_[dset_num] function.
NOTE: numpy.i is currently not part of SWIG but included in the numpy distribution (under numpy/tools/swig/numpy.i) NOTE: numpy.i is currently not part of SWIG but included in the numpy distribution (under numpy/tools/swig/numpy.i)
This means that the interface file have to be provided to SWIG upon compilation either by This means that the interface file have to be provided to SWIG during compilation either by
copying it to the local working directory or by providing -l/path/to/numpy.i flag upon SWIG compilation copying it to the local working directory or by providing -l/path/to/numpy.i flag upon SWIG compilation
*/ */
%include "numpy.i" %include "numpy.i"
@ -60,7 +65,12 @@
import_array(); import_array();
%} %}
/* Enable write|read_safe functions to convert numpy arrays from/to double arrays */
%apply (double* ARGOUT_ARRAY1, int DIM1) {(double * const dset_out, const int64_t dim_out)}; %apply (double* ARGOUT_ARRAY1, int DIM1) {(double * const dset_out, const int64_t dim_out)};
%apply (double* IN_ARRAY1, int DIM1) {(const double * dset_in, const int64_t dim_in)};
/* Enable write|read_safe functions to convert numpy arrays from/to int32 arrays */
%apply (int* ARGOUT_ARRAY1, int DIM1) {(int32_t * const dset_out, const int64_t dim_out)};
%apply (int* IN_ARRAY1, int DIM1) {(const int32_t * dset_in, const int64_t dim_in)};
/* This tells SWIG to treat char ** dset_in pattern as a special case /* This tells SWIG to treat char ** dset_in pattern as a special case
Enables access to trexio_[...]_write_dset_str set of functions directly, i.e. Enables access to trexio_[...]_write_dset_str set of functions directly, i.e.
@ -93,47 +103,7 @@ import_array();
free((char *) $1); free((char *) $1);
} }
/* [WIP] This is an attempt to make SWIG treat char ** dset_out as a special case
In order to return list of string to Python from C-native char ** dset_out,
which is modified (but not allocated) within the trexio_[...]_read_dset_str function
*/
%typemap(in, numinputs=0) char ** dset_out (char * temp) {
/*temp = (char *) malloc(1028*sizeof(char));*/
$1 = &temp;
}
%typemap(argout) char ** dset_out {
Py_ssize_t i = 0;
Py_ssize_t mysize = 12;
PyObject *o_res = PyList_New(mysize);
PyObject *o;
for (i = 0; i < mysize; i++) {
//printf("%s\n", $1[i]);
o = PyString_FromString($1[i]);
PyList_SetItem(o_res, i, o);
}
PyObject *o2, *o3;
if ((!$result) || ($result == Py_None)) {
$result = o_res;
} else {
if (!PyTuple_Check($result)) {
PyObject *o2 = $result;
$result = PyTuple_New(1);
PyTuple_SetItem($result, 0, o2);
}
o3 = PyTuple_New(1);
PyTuple_SetItem(o3, 0, o_res);
o2 = $result;
$result = PySequence_Concat(o2, o3);
Py_DECREF(o2);
Py_DECREF(o3);
Py_DECREF(o_res);
}
}
/* Parse the header files to generate wrappers */ /* Parse the header files to generate wrappers */
%include "trexio_s.h" %include "trexio_s.h"
%include "trexio.h" %include "trexio.h"