1
0
mirror of https://github.com/TREX-CoE/trexio.git synced 2024-08-25 06:31:43 +02:00

add pattern matching for arrays of strings + comments

This commit is contained in:
q-posev 2021-07-26 18:49:26 +02:00
parent 12c2a3a014
commit 1f98604d58
3 changed files with 86 additions and 41 deletions

View File

@ -2,6 +2,7 @@
/* Define SWIGWORDSIZE in order to properly align long integers on 64-bit system */
#define SWIGWORDSIZE64
%{
#define SWIG_FILE_WITH_INIT
/* Include the headers in the wrapper code */
#include "trexio_s.h"
#include "trexio.h"
@ -9,7 +10,7 @@
/* Include stdint to recognize types from stdint.h */
%include <stdint.i>
/* Include carrays to work with C-like arrays */
/* Include carrays to work with C pointers to arrays */
%include "carrays.i"
/* Include classes that correspond to integer and float arrays */
%array_class(double, doubleArray);
@ -20,6 +21,7 @@
Useful when working with C pointers
*/
%include typemaps.i
/* Redefine the int32_t* and int64_t* num to be output
Useful for TREXIO read_num functions where the
num variable is modified by address
@ -29,8 +31,14 @@
/* Does not work for arrays (SIGSEGV)
%apply double *OUTPUT { double* const dataset };
*/
/* TREXIO back ends and exit codes can be redefined in the SWIG target language using %ignore and further #define statements
(instead of disabling the type cast in the trexio.h file)
/* This enables access to trexio_[...]_read_dset_str_low set of functions
in order to return one long string with TREXIO_DELIM delimeter as 2-nd argument of output tuple
*/
%include <cstring.i>
%cstring_bounded_output(char* dset_out, 1024);
/* [WIP] TREXIO back ends and exit codes can be redefined in the SWIG target language
using %ignore and further #define statements (instead of disabling the type cast in the trexio.h file)
*/
/*
%ignore TREXIO_HDF5; // Ignore a macro in the header file
@ -38,8 +46,11 @@
#define TREXIO_HDF5 0
#define TREXIO_TEXT 0
*/
/* This tells SWIG to treat char ** as a special case */
%typemap(in) char ** {
/* This tells SWIG to treat char ** dset_in pattern as a special case
Enables access to trexio_[...]_write_dset_str set of functions directly, i.e.
by converting input list of strings from Python into char ** of C
*/
%typemap(in) char ** dset_in {
/* Check if is a list */
if (PyList_Check($input)) {
int size = PyList_Size($input);
@ -62,20 +73,51 @@
}
}
/* This cleans up the char ** array we malloc-ed before */
%typemap(freearg) char ** {
%typemap(freearg) char ** dset_in {
free((char *) $1);
}
/* Now a test function
%inline %{
int print_args(char ** argv) {
int i = 0;
while (argv[i]) {
printf("argv[%d] = %s\n", i, argv[i]);
i++;
}
return i;
/* [WIP] This is an attempt to make SWIG treat char ** dset_out as a special case
In order to return list of string to Python from C-native char ** dset_out,
which is modified (but not allocated) within the trexio_[...}read_dset_str function
*/
%typemap(in, numinputs=0) char ** dset_out (char * temp) {
/*temp = (char *) malloc(1028*sizeof(char));*/
$1 = &temp;
}
%}*/
%typemap(argout) char ** dset_out {
Py_ssize_t i = 0;
Py_ssize_t mysize = 12;
PyObject *o_res = PyList_New(mysize);
PyObject *o;
for (i = 0; i < mysize; i++) {
//printf("%s\n", $1[i]);
o = PyString_FromString($1[i]);
PyList_SetItem(o_res, i, o);
}
PyObject *o2, *o3;
if ((!$result) || ($result == Py_None)) {
$result = o_res;
} else {
if (!PyTuple_Check($result)) {
PyObject *o2 = $result;
$result = PyTuple_New(1);
PyTuple_SetItem($result, 0, o2);
}
o3 = PyTuple_New(1);
PyTuple_SetItem(o3, 0, o_res);
o2 = $result;
$result = PySequence_Concat(o2, o3);
Py_DECREF(o2);
Py_DECREF(o3);
Py_DECREF(o_res);
}
}
/* Parse the header files to generate wrappers */
%include "trexio_s.h"
%include "trexio.h"

View File

@ -1643,19 +1643,19 @@ trexio_read_chunk_ao_2e_int_eri_value_64(trexio_t* const file,
#+begin_src c :tangle hrw_dset_str_front.h :exports none
trexio_exit_code trexio_has_$group_dset$(trexio_t* const file);
trexio_exit_code trexio_read_$group_dset$_low(trexio_t* const file, char* dset, const uint32_t max_str_len);
trexio_exit_code trexio_write_$group_dset$_low(trexio_t* const file, const char* dset, const uint32_t max_str_len);
trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, char** dset, const uint32_t max_str_len);
trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const char** dset, const uint32_t max_str_len);
trexio_exit_code trexio_read_$group_dset$_low(trexio_t* const file, char* dset_out, const uint32_t max_str_len);
trexio_exit_code trexio_write_$group_dset$_low(trexio_t* const file, const char* dset_in, const uint32_t max_str_len);
trexio_exit_code trexio_read_$group_dset$(trexio_t* const file, char** dset_out, const uint32_t max_str_len);
trexio_exit_code trexio_write_$group_dset$(trexio_t* const file, const char** dset_in, const uint32_t max_str_len);
#+end_src
#+begin_src c :tangle read_dset_str_front.c
trexio_exit_code
trexio_read_$group_dset$_low (trexio_t* const file, char* dset, const uint32_t max_str_len)
trexio_read_$group_dset$_low (trexio_t* const file, char* dset_out, const uint32_t max_str_len)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
if (dset_out == NULL) return TREXIO_INVALID_ARG_2;
if (max_str_len <= 0) return TREXIO_INVALID_ARG_3;
trexio_exit_code rc;
@ -1673,15 +1673,15 @@ trexio_read_$group_dset$_low (trexio_t* const file, char* dset, const uint32_t m
switch (file->back_end) {
case TREXIO_TEXT:
return trexio_text_read_$group_dset$(file, dset, rank, dims, max_str_len);
return trexio_text_read_$group_dset$(file, dset_out, rank, dims, max_str_len);
break;
case TREXIO_HDF5:
return trexio_hdf5_read_$group_dset$(file, dset, rank, dims, max_str_len);
return trexio_hdf5_read_$group_dset$(file, dset_out, rank, dims, max_str_len);
break;
/*
case TREXIO_JSON:
rc = trexio_json_read_$group_dset$(file, dset, rank, dims);
rc = trexio_json_read_$group_dset$(file, dset_out, rank, dims);
break;
,*/
}
@ -1690,11 +1690,11 @@ trexio_read_$group_dset$_low (trexio_t* const file, char* dset, const uint32_t m
}
trexio_exit_code
trexio_read_$group_dset$ (trexio_t* const file, char** dset, const uint32_t max_str_len)
trexio_read_$group_dset$ (trexio_t* const file, char** dset_out, const uint32_t max_str_len)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
if (dset_out == NULL) return TREXIO_INVALID_ARG_2;
if (max_str_len <= 0) return TREXIO_INVALID_ARG_3;
assert(file->back_end < TREXIO_INVALID_BACK_END);
@ -1725,8 +1725,8 @@ trexio_read_$group_dset$ (trexio_t* const file, char** dset, const uint32_t max_
return TREXIO_FAILURE;
}
strcpy(dset[i], "");
strcat(dset[i], pch);
strcpy(dset_out[i], "");
strcat(dset_out[i], pch);
}
@ -1738,11 +1738,11 @@ trexio_read_$group_dset$ (trexio_t* const file, char** dset, const uint32_t max_
#+begin_src c :tangle write_dset_str_front.c
trexio_exit_code
trexio_write_$group_dset$_low (trexio_t* const file, const char* dset, const uint32_t max_str_len)
trexio_write_$group_dset$_low (trexio_t* const file, const char* dset_in, const uint32_t max_str_len)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
if (dset_in == NULL) return TREXIO_INVALID_ARG_2;
if (max_str_len <= 0) return TREXIO_INVALID_ARG_3;
if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS;
@ -1769,7 +1769,7 @@ trexio_write_$group_dset$_low (trexio_t* const file, const char* dset, const uin
/* parse the string using strtok */
for(uint64_t i=0; i<dims[0]; i++) {
pch = i == 0 ? strtok( (char*) dset, TREXIO_DELIM) : strtok(NULL, TREXIO_DELIM) ;
pch = i == 0 ? strtok( (char*) dset_in, TREXIO_DELIM) : strtok(NULL, TREXIO_DELIM) ;
if (pch == NULL) {
FREE(dset_str[0]);
@ -1815,11 +1815,11 @@ trexio_write_$group_dset$_low (trexio_t* const file, const char* dset, const uin
}
trexio_exit_code
trexio_write_$group_dset$ (trexio_t* const file, const char** dset, const uint32_t max_str_len)
trexio_write_$group_dset$ (trexio_t* const file, const char** dset_in, const uint32_t max_str_len)
{
if (file == NULL) return TREXIO_INVALID_ARG_1;
if (dset == NULL) return TREXIO_INVALID_ARG_2;
if (dset_in == NULL) return TREXIO_INVALID_ARG_2;
if (max_str_len <= 0) return TREXIO_INVALID_ARG_3;
if (trexio_has_$group_dset$(file) == TREXIO_SUCCESS) return TREXIO_DSET_ALREADY_EXISTS;
@ -1838,7 +1838,7 @@ trexio_write_$group_dset$ (trexio_t* const file, const char** dset, const uint32
strcpy(str_compiled, "");
for (uint64_t i=0; i < dset_dim; i++) {
strcat(str_compiled, dset[i]);
strcat(str_compiled, dset_in[i]);
strcat(str_compiled, TREXIO_DELIM);
}

View File

@ -27,7 +27,7 @@ try:
if TEST_TREXIO_BACKEND == 1:
shutil.rmtree(output_filename)
except:
print (f'Test file {OUTPUT_FILENAME} does not exist')
print (f'Test file {output_filename} does not exist')
#=========================================================#
#============ WRITE THE DATA IN THE TEST FILE ============#
@ -95,13 +95,16 @@ assert rc==0
for i in range(nucleus_num):
assert charges2[i]==charges[i]
labels2 = ['' for i in range(nucleus_num)]
print(labels2)
rc = trexio_read_nucleus_label(test_file2, labels2, 10)
print(labels2)
# [WIP]: ideally, the list of strings should be returned as below
#rc, label_2d = trexio_read_nucleus_label(test_file2, 10)
# [WIP]: currently only low-level routines (return one long string instead of an array of strings) work
rc, labels_1d = trexio_read_nucleus_label_low(test_file2, 10)
assert rc==0
labels_2d = [label for label in labels_1d.split('\n') if label]
print(labels_2d)
for i in range(nucleus_num):
assert labels2[i]==labels[i]
assert labels_2d[i]==labels[i]
rc = trexio_close(test_file2)
assert rc==0