Merge pull request #16 from libAtoms/error_messages

WIP on moving all errors from stderr to returned error message string
libAtoms · Sep 26, 2023 · 3c7f293 · 3c7f293
2 parents c9de992 + ecb81a5
commit 3c7f293
Show file tree

Hide file tree

Showing 8 changed files with 99 additions and 66 deletions.
diff --git a/.cirrus.yml b/.cirrus.yml
@@ -15,33 +15,6 @@ build_and_store_wheels: &BUILD_AND_STORE_WHEELS
   wheels_artifacts:
     path: "wheelhouse/*"
 
-
-######################################################################
-# Build linux_aarch64 natively
-######################################################################
-
-cirrus_wheels_linux_aarch64_task:
-  compute_engine_instance:
-    image_project: cirrus-images
-    image: family/docker-builder-arm64
-    architecture: arm64
-    platform: linux
-    cpu: 4
-    memory: 8G
-  matrix:
-    - env:
-        CIBW_BUILD: cp38-* cp39-*
-    - env:
-        CIBW_BUILD: cp310-* cp311-*
-  build_script: |
-    apt install -y python3-venv python-is-python3
-    which python
-    echo $CIRRUS_CHANGE_MESSAGE
-    # needed for submodules
-    git submodule update --init
-  <<: *BUILD_AND_STORE_WHEELS
-
-
 ######################################################################
 # Build macosx_arm64 natively
 ######################################################################

diff --git a/libextxyz/extxyz.c b/libextxyz/extxyz.c
@@ -70,7 +70,7 @@ void unquote(char *str) {
     str[output_len] = 0;
 }
 
-int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_kv_pair, int *in_old_one_d) {
+int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_kv_pair, int *in_old_one_d, char *error_message) {
     //DEBUG printf("enter parse_tree in_kv_pair %d\n", *in_kv_pair); //DEBUG
     //DEBUG if (node->cl_obj) { //DEBUG
         //DEBUG printf("node type %d gid %d", node->cl_obj->tp, node->cl_obj->gid); //DEBUG
@@ -161,7 +161,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
                 } else {
                     // ignore blank regex, they show up sometimes e.g. after end of sequence
                     if (strlen(str) > 0) {
-                        fprintf(stderr, "Failed to parse some regex as data key '%s' str '%s'\n", 
+                        sprintf(error_message, "Failed to parse some regex as data key '%s' str '%s'\n", 
                                 (*cur_entry)->key, str);
                         // free before incomplete return
                         free(str);
@@ -181,7 +181,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
                     // allocate string for printing
                     char * str = (char *) malloc((node->len+1)*sizeof(char));
                     strncpy(str, node->str, node->len);
-                    fprintf(stderr, "Failed to parse some keyword as data, key '%s' str '%s'\n", (*cur_entry)->key, str);
+                    sprintf(error_message, "Failed to parse some keyword as data, key '%s' str '%s'\n", (*cur_entry)->key, str);
                     free(str);
                     return 1;
                 /*
@@ -237,7 +237,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
     //DEBUG printf("looping over children\n"); //DEBUG
     for (cleri_children_t *child = node->children; child; child = child->next) {
         //DEBUG printf("child\n"); //DEBUG
-        int err = parse_tree(child->node, cur_entry, in_seq, in_kv_pair, in_old_one_d);
+        int err = parse_tree(child->node, cur_entry, in_seq, in_kv_pair, in_old_one_d, error_message);
         if (err) {
             return err;
         }
@@ -254,7 +254,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
             // leaving a row in a nested list
             if ((*cur_entry)->ncols > 0 && (*cur_entry)->ncols != (*cur_entry)->n_in_row) {
                 // not first row, check for consistency
-                fprintf(stderr, "key %s nested list row %d number of entries in row %d inconsistent with prev %d\n", 
+                sprintf(error_message, "key %s nested list row %d number of entries in row %d inconsistent with prev %d\n", 
                         (*cur_entry)->key, (*cur_entry)->nrows+1, (*cur_entry)->n_in_row, (*cur_entry)->ncols);
                 return 1;
             }
@@ -355,7 +355,7 @@ void free_DataLinkedList(DataLinkedList *list, enum data_type data_t, int free_s
 }
 
 
-int DataLinkedList_to_data(DictEntry *dict) {
+int DataLinkedList_to_data(DictEntry *dict, char *error_message) {
     int stat=0;
 
     for (DictEntry *entry = dict; entry; entry = entry->next) {
@@ -377,7 +377,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
                 if (data_t != data_i && data_t != data_f) {
                     // prev data is not a number, fail
                     if (!stat) {
-                        fprintf(stderr, "ERROR: in an array got a number type %d after a non-number %d\n",
+                        sprintf(error_message, "ERROR: in an array got a number type %d after a non-number %d\n",
                             data_item->data_t, data_t);
                     }
                     stat=1;
@@ -388,7 +388,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
                 }
             } else if (data_item->data_t != data_t) {
                 if (!stat) {
-                    fprintf(stderr, "ERROR: in an array got a change in type from %d to %dthat cannot be promoted\n",
+                    sprintf(error_message, "ERROR: in an array got a change in type from %d to %d that cannot be promoted\n",
                         data_t, data_item->data_t);
                 }
                 stat=1;
@@ -446,7 +446,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
 }
 
 
-void *tree_to_dict(cleri_parse_t *tree) {
+void *tree_to_dict(cleri_parse_t *tree, char *error_message) {
     //DEBUG dump_tree(tree->tree, ""); //DEBUG
     // printf("END DUMP\n");
 
@@ -458,17 +458,14 @@ void *tree_to_dict(cleri_parse_t *tree) {
 
     int in_seq = 0, in_kv_pair = 0, in_old_one_d = 0;
     int err;
-    err = parse_tree(tree->tree, &cur_entry, &in_seq, &in_kv_pair, &in_old_one_d);
+    err = parse_tree(tree->tree, &cur_entry, &in_seq, &in_kv_pair, &in_old_one_d, error_message);
     if (err) {
-        fprintf(stderr, "error parsing tree\n");
+        sprintf(error_message, "error parsing tree\n");
         return 0;
     }
 
-    err = DataLinkedList_to_data(dict);
-    if (err) {
-        fprintf(stderr, "ERROR converting data linked list to data arrays, probably inconsistent data types\n");
-        return 0;
-    }
+    err = DataLinkedList_to_data(dict, error_message);
+    if (err) return 0;
 
     return dict;
 }
@@ -568,7 +565,7 @@ char *read_line(char **line, unsigned long *line_len, FILE *fp) {
     return *line;
 }
 
-int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment) {
+int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment, char *error_message) {
     char *line;
     unsigned long line_len;
     unsigned long line_len_init = 1024;
@@ -592,7 +589,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
     }
     int nat_stat = sscanf(line, "%d", nat);
     if (nat_stat != 1) {
-        fprintf(stderr, "Failed to parse int natoms from '%s'\n", line);
+        sprintf(error_message, "Failed to parse int natoms from '%s'", line);
         free(line);
         return 0;
     }
@@ -611,15 +608,15 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
         tree = cleri_parse(kv_grammar, line);
     }
     if (! tree->is_valid) {
-        fprintf(stderr, "Failed to parse string at pos %zd\n", tree->pos);
+        sprintf(error_message, "Failed to parse string at pos %zd", tree->pos);
         cleri_parse_free(tree);
         free(line);
         return 0;
     }
-    *info = tree_to_dict(tree);
+    *info = tree_to_dict(tree, error_message);
     cleri_parse_free(tree);
     if (! info) {
-        fprintf(stderr, "Failed to convert tree to dict\n");
+        sprintf(error_message, "Failed to convert tree to dict");
         free(line);
         return 0;
     }
@@ -696,7 +693,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
         // advance to col type
         pf = strtok(NULL, ":");
         if (strlen(pf) != 1) {
-            fprintf(stderr, "Failed to parse property type '%s' for property '%s' (# %d)\n", pf, cur_array->key, prop_i);
+            sprintf(error_message, "Failed to parse property type '%s' for property '%s' (# %d)", pf, cur_array->key, prop_i);
             free(props);
             free(line); free(re_str);
             return 0;
@@ -708,7 +705,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
         int col_num;
         int col_num_stat = sscanf(pf, "%d", &col_num);
         if (col_num_stat != 1) {
-            fprintf(stderr, "Failed to parse int property ncolumns from '%s' for property '%s' (# %d)\n", pf, cur_array->key, prop_i);
+            sprintf(error_message, "Failed to parse int property ncolumns from '%s' for property '%s' (# %d)", pf, cur_array->key, prop_i);
             free(props);
             free(line); free(re_str);
             return 0;
@@ -741,7 +738,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
                 this_re = SIMPLESTRING_RE; // "\\S+";
                 break;
             default:
-                fprintf(stderr, "Unknown property type '%c' for property key '%s' (# %d)\n", col_type, cur_array->key, prop_i);
+                sprintf(error_message, "Unknown property type '%c' for property key '%s' (# %d)", col_type, cur_array->key, prop_i);
                 // free incomplete data before returning
                 free(cur_array->data);
                 cur_array->data = 0;
@@ -778,7 +775,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
     pcre2_code *re = pcre2_compile((unsigned char *)re_str, PCRE2_ZERO_TERMINATED, 0, &pcre2_error, &erroffset, NULL);
     if (re == NULL) {
         pcre2_get_error_message(pcre2_error, (unsigned char *)line, line_len);
-        fprintf(stderr, "ERROR %s compiling pcre pattern for atoms lines offset %ld re '%s'\n", line, erroffset, re_str);
+        sprintf(error_message, "ERROR %s compiling pcre pattern for atoms lines offset %ld re '%s'", line, erroffset, re_str);
         free(line); free(re_str);
         return 0;
     }
@@ -800,14 +797,14 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
         if (rc != tot_col_num+1) {
             if (rc < 0) {
                 if (rc == PCRE2_ERROR_NOMATCH) {
-                    fprintf(stderr, "ERROR: pcre2 regexp got NOMATCH on atom line %d\n", li);
+                    sprintf(error_message, "ERROR: pcre2 regexp got NOMATCH on atom line %d", li);
                 } else {
-                    fprintf(stderr, "ERROR: pcre2 regexp got error %d on atom line %d\n", rc, li);
+                    sprintf(error_message, "ERROR: pcre2 regexp got error %d on atom line %d", rc, li);
                 }
             } else if (rc == 0) {
-                fprintf(stderr, "ERROR: pcre2 regexp got match_data not big enough (should never happen) on atom line %d\n", li);
+                sprintf(error_message, "ERROR: pcre2 regexp got match_data not big enough (should never happen) on atom line %d", li);
             } else {
-                fprintf(stderr, "ERROR: pcre2 regexp failed on atom line %d at group %d\n", li, rc-1);
+                sprintf(error_message, "ERROR: pcre2 regexp failed on atom line %d at group %d", li, rc-1);
             }
             pcre2_match_data_free(match_data); pcre2_code_free(re);
             free(line); free(re_str);

diff --git a/libextxyz/extxyz.h b/libextxyz/extxyz.h
@@ -68,6 +68,6 @@ typedef struct dict_entry_struct {
 
 void print_dict(DictEntry *dict);
 void free_dict(DictEntry *dict);
-int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment);
+int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment, char *error_message);
 int extxyz_write_ll(FILE *fp, int nat, DictEntry *info, DictEntry *arrays);
 void* extxyz_malloc(size_t nbytes);
diff --git a/libextxyz/fextxyz.f90 b/libextxyz/fextxyz.f90
@@ -62,13 +62,14 @@ function extxyz_malloc(nbytes) bind(c) result(buffer)
             type(C_PTR) :: buffer
         end
 
-        function extxyz_read_ll(kv_grammar, fp, nat, info, arrays, comment) bind(c)
+        function extxyz_read_ll(kv_grammar, fp, nat, info, arrays, comment, error_message) bind(c)
             use iso_c_binding
             integer(kind=C_INT) :: extxyz_read_ll
             type(C_PTR), value :: kv_grammar, fp
             integer(kind=C_INT) :: nat
             type(C_PTR) :: info, arrays
             type(C_PTR), value :: comment
+            character(kind=C_CHAR) :: error_message(*)
         end function extxyz_read_ll
 
         function extxyz_write_ll(fp, nat, info, arrays) bind(c)
@@ -431,6 +432,8 @@ function read_extxyz_file(file, at, verbose) result(success)
     type(Dictionary) :: f_info, f_arrays
     real(C_DOUBLE) :: lattice(3, 3) ! FIXME change C_DOUBLE to DP when this moves to QUIP
     integer :: i
+    character(C_CHAR), dimension(1024), target :: error_message
+    character(len=1024) :: f_error_message
 
     success = .false.
     if (present(verbose)) do_verbose = verbose
@@ -443,8 +446,12 @@ function read_extxyz_file(file, at, verbose) result(success)
     c_info = c_loc(info)
     c_arrays = c_loc(arrays)
 
-    err = extxyz_read_ll(kv_grammar, file, nat, c_info, c_arrays, C_NULL_PTR)
+    err = extxyz_read_ll(kv_grammar, file, nat, c_info, c_arrays, C_NULL_PTR, error_message)
     success = (err == 1)
+    if (.not. success) then
+        call C_string_ptr_to_F_string(c_loc(error_message), f_error_message)
+        call print('error_message='//f_error_message)
+    end if
 
     if (do_verbose) then
         call print_dict(c_info)

diff --git a/python/extxyz/cextxyz.py b/python/extxyz/cextxyz.py
@@ -12,6 +12,9 @@ class FILE_ptr(ctypes.c_void_p):
 class cleri_grammar_t_ptr(ctypes.c_void_p):
     pass
 
+class ExtXYZError(Exception):
+    pass
+
 DATA_I = 1
 DATA_F = 2
 DATA_B = 3
@@ -209,6 +212,20 @@ def cfclose(fp):
     fclose.args = [FILE_ptr]
     fclose(fp)
 
+
+def cftell(fp):
+    ftell = libc.ftell
+    ftell.argtypes = [FILE_ptr]
+    ftell.restype = ctypes.c_long
+    return ftell(fp)
+
+
+def cfseek(fp, offset, whence):
+    fseek = libc.fseek
+    fseek.argtypes = [FILE_ptr, ctypes.c_long, ctypes.c_int]
+    fseek.restype = ctypes.c_int
+    return fseek(fp, offset, whence)
+
 
 def read_frame_dicts(fp, verbose=False, comment=None):
     """Read a single frame using extxyz_read_ll() C function
@@ -224,31 +241,39 @@ def read_frame_dicts(fp, verbose=False, comment=None):
     nat = ctypes.c_int()
     info = Dict_entry_ptr()
     arrays = Dict_entry_ptr()
-    eof = False
+    failure = False
 
     try:
         if comment is not None:
             comment = comment.encode('utf-8')
         else:
             comment = ctypes.POINTER(ctypes.c_char)()
+
+        error_message = ctypes.create_string_buffer(1024)            
         if not extxyz.extxyz_read_ll(_kv_grammar,
                                      fp,
                                      ctypes.byref(nat),
                                      ctypes.byref(info),
                                      ctypes.byref(arrays),
-                                     comment):
-            eof = True
-            raise EOFError()
+                                     comment,
+                                     error_message):
+            failure = True
+            if (error_message.value == b'' or 
+                error_message.value.decode().startswith("Failed to parse int natoms from ' ")):
+                raise EOFError
+            else:
+                raise ExtXYZError(error_message.value.decode().strip().replace('\n', ''))
 
         if verbose:
+            print('error_message:', error_message.value.decode())
             extxyz.print_dict(info)
             extxyz.print_dict(arrays)
 
         py_info = c_to_py_dict(info, deepcopy=True)
         py_arrays = c_to_py_dict(arrays, deepcopy=True)
 
     finally:
-        if not eof:
+        if not failure:
             extxyz.free_dict(info)
             extxyz.free_dict(arrays)
 

diff --git a/python/extxyz/extxyz.py b/python/extxyz/extxyz.py
@@ -592,8 +592,24 @@ def read_frame(file, verbose=0, use_cextxyz=True,
 
     try:
         if use_cextxyz:
-            natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose, 
-                                                            comment=comment)        
+            try:
+                fpos = cextxyz.cftell(file)
+                natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose, 
+                                                                comment=comment)
+            except cextxyz.ExtXYZError as msg:
+                error_message,  = msg.args
+                print('cextxyz error message', error_message)
+                if error_message.startswith('Failed to parse string'):
+                    try:
+                        # rewined then try again, ignoring comment line provided and interpreting as plain XYZ file with species and position columns
+                        cextxyz.cfseek(file, fpos, 0)
+                        natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose, 
+                                                                        comment="Properties=species:S:1:pos:R:3")
+                    except cextxyz.ExtXYZError:
+                        raise
+                else:
+                    raise
+
             properties = info.pop('Properties', 'species:S:1:pos:R:3')
             properties = Properties(property_string=properties)
             data = np.zeros(natoms, properties.dtype_vector)

diff --git a/tests/test_ase_cases.py b/tests/test_ase_cases.py
@@ -340,6 +340,17 @@ def test_long_header_line(tmp_path):
     print("lengths", lengths)
     at = read(tmp_path / "long_header.xyz")
     assert at.info["test_float_a"] == pytest.approx(test_float)
+
+
+def test_lammps_file(tmp_path):
+    with open(tmp_path / "lammps.xyz", "w") as fout:
+        fout.write("""1
+Atoms. Timestep: 1000000
+Ar    1.4102613692638457    0.9647607662828660    1.3209769521273491
+""")
+    at = read(tmp_path / "lammps.xyz")
+    assert len(at) == 1
+    assert at.positions[0] == pytest.approx([1.4102613692638457, 0.9647607662828660, 1.3209769521273491])
 
 # no constraint I/O support yet
 ##@pytest.mark.parametrize('constraint', [FixAtoms(indices=(0, 2)),