Skip to content

Commit

Permalink
Merge pull request #16 from libAtoms/error_messages
Browse files Browse the repository at this point in the history
WIP on moving all errors from stderr to returned error message string
  • Loading branch information
jameskermode authored Sep 26, 2023
2 parents c9de992 + ecb81a5 commit 3c7f293
Show file tree
Hide file tree
Showing 8 changed files with 99 additions and 66 deletions.
27 changes: 0 additions & 27 deletions .cirrus.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,33 +15,6 @@ build_and_store_wheels: &BUILD_AND_STORE_WHEELS
wheels_artifacts:
path: "wheelhouse/*"


######################################################################
# Build linux_aarch64 natively
######################################################################

cirrus_wheels_linux_aarch64_task:
compute_engine_instance:
image_project: cirrus-images
image: family/docker-builder-arm64
architecture: arm64
platform: linux
cpu: 4
memory: 8G
matrix:
- env:
CIBW_BUILD: cp38-* cp39-*
- env:
CIBW_BUILD: cp310-* cp311-*
build_script: |
apt install -y python3-venv python-is-python3
which python
echo $CIRRUS_CHANGE_MESSAGE
# needed for submodules
git submodule update --init
<<: *BUILD_AND_STORE_WHEELS


######################################################################
# Build macosx_arm64 natively
######################################################################
Expand Down
55 changes: 26 additions & 29 deletions libextxyz/extxyz.c
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ void unquote(char *str) {
str[output_len] = 0;
}

int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_kv_pair, int *in_old_one_d) {
int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_kv_pair, int *in_old_one_d, char *error_message) {
//DEBUG printf("enter parse_tree in_kv_pair %d\n", *in_kv_pair); //DEBUG
//DEBUG if (node->cl_obj) { //DEBUG
//DEBUG printf("node type %d gid %d", node->cl_obj->tp, node->cl_obj->gid); //DEBUG
Expand Down Expand Up @@ -161,7 +161,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
} else {
// ignore blank regex, they show up sometimes e.g. after end of sequence
if (strlen(str) > 0) {
fprintf(stderr, "Failed to parse some regex as data key '%s' str '%s'\n",
sprintf(error_message, "Failed to parse some regex as data key '%s' str '%s'\n",
(*cur_entry)->key, str);
// free before incomplete return
free(str);
Expand All @@ -181,7 +181,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
// allocate string for printing
char * str = (char *) malloc((node->len+1)*sizeof(char));
strncpy(str, node->str, node->len);
fprintf(stderr, "Failed to parse some keyword as data, key '%s' str '%s'\n", (*cur_entry)->key, str);
sprintf(error_message, "Failed to parse some keyword as data, key '%s' str '%s'\n", (*cur_entry)->key, str);
free(str);
return 1;
/*
Expand Down Expand Up @@ -237,7 +237,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
//DEBUG printf("looping over children\n"); //DEBUG
for (cleri_children_t *child = node->children; child; child = child->next) {
//DEBUG printf("child\n"); //DEBUG
int err = parse_tree(child->node, cur_entry, in_seq, in_kv_pair, in_old_one_d);
int err = parse_tree(child->node, cur_entry, in_seq, in_kv_pair, in_old_one_d, error_message);
if (err) {
return err;
}
Expand All @@ -254,7 +254,7 @@ int parse_tree(cleri_node_t *node, DictEntry **cur_entry, int *in_seq, int *in_k
// leaving a row in a nested list
if ((*cur_entry)->ncols > 0 && (*cur_entry)->ncols != (*cur_entry)->n_in_row) {
// not first row, check for consistency
fprintf(stderr, "key %s nested list row %d number of entries in row %d inconsistent with prev %d\n",
sprintf(error_message, "key %s nested list row %d number of entries in row %d inconsistent with prev %d\n",
(*cur_entry)->key, (*cur_entry)->nrows+1, (*cur_entry)->n_in_row, (*cur_entry)->ncols);
return 1;
}
Expand Down Expand Up @@ -355,7 +355,7 @@ void free_DataLinkedList(DataLinkedList *list, enum data_type data_t, int free_s
}


int DataLinkedList_to_data(DictEntry *dict) {
int DataLinkedList_to_data(DictEntry *dict, char *error_message) {
int stat=0;

for (DictEntry *entry = dict; entry; entry = entry->next) {
Expand All @@ -377,7 +377,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
if (data_t != data_i && data_t != data_f) {
// prev data is not a number, fail
if (!stat) {
fprintf(stderr, "ERROR: in an array got a number type %d after a non-number %d\n",
sprintf(error_message, "ERROR: in an array got a number type %d after a non-number %d\n",
data_item->data_t, data_t);
}
stat=1;
Expand All @@ -388,7 +388,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
}
} else if (data_item->data_t != data_t) {
if (!stat) {
fprintf(stderr, "ERROR: in an array got a change in type from %d to %dthat cannot be promoted\n",
sprintf(error_message, "ERROR: in an array got a change in type from %d to %d that cannot be promoted\n",
data_t, data_item->data_t);
}
stat=1;
Expand Down Expand Up @@ -446,7 +446,7 @@ int DataLinkedList_to_data(DictEntry *dict) {
}


void *tree_to_dict(cleri_parse_t *tree) {
void *tree_to_dict(cleri_parse_t *tree, char *error_message) {
//DEBUG dump_tree(tree->tree, ""); //DEBUG
// printf("END DUMP\n");

Expand All @@ -458,17 +458,14 @@ void *tree_to_dict(cleri_parse_t *tree) {

int in_seq = 0, in_kv_pair = 0, in_old_one_d = 0;
int err;
err = parse_tree(tree->tree, &cur_entry, &in_seq, &in_kv_pair, &in_old_one_d);
err = parse_tree(tree->tree, &cur_entry, &in_seq, &in_kv_pair, &in_old_one_d, error_message);
if (err) {
fprintf(stderr, "error parsing tree\n");
sprintf(error_message, "error parsing tree\n");
return 0;
}

err = DataLinkedList_to_data(dict);
if (err) {
fprintf(stderr, "ERROR converting data linked list to data arrays, probably inconsistent data types\n");
return 0;
}
err = DataLinkedList_to_data(dict, error_message);
if (err) return 0;

return dict;
}
Expand Down Expand Up @@ -568,7 +565,7 @@ char *read_line(char **line, unsigned long *line_len, FILE *fp) {
return *line;
}

int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment) {
int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment, char *error_message) {
char *line;
unsigned long line_len;
unsigned long line_len_init = 1024;
Expand All @@ -592,7 +589,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
}
int nat_stat = sscanf(line, "%d", nat);
if (nat_stat != 1) {
fprintf(stderr, "Failed to parse int natoms from '%s'\n", line);
sprintf(error_message, "Failed to parse int natoms from '%s'", line);
free(line);
return 0;
}
Expand All @@ -611,15 +608,15 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
tree = cleri_parse(kv_grammar, line);
}
if (! tree->is_valid) {
fprintf(stderr, "Failed to parse string at pos %zd\n", tree->pos);
sprintf(error_message, "Failed to parse string at pos %zd", tree->pos);
cleri_parse_free(tree);
free(line);
return 0;
}
*info = tree_to_dict(tree);
*info = tree_to_dict(tree, error_message);
cleri_parse_free(tree);
if (! info) {
fprintf(stderr, "Failed to convert tree to dict\n");
sprintf(error_message, "Failed to convert tree to dict");
free(line);
return 0;
}
Expand Down Expand Up @@ -696,7 +693,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
// advance to col type
pf = strtok(NULL, ":");
if (strlen(pf) != 1) {
fprintf(stderr, "Failed to parse property type '%s' for property '%s' (# %d)\n", pf, cur_array->key, prop_i);
sprintf(error_message, "Failed to parse property type '%s' for property '%s' (# %d)", pf, cur_array->key, prop_i);
free(props);
free(line); free(re_str);
return 0;
Expand All @@ -708,7 +705,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
int col_num;
int col_num_stat = sscanf(pf, "%d", &col_num);
if (col_num_stat != 1) {
fprintf(stderr, "Failed to parse int property ncolumns from '%s' for property '%s' (# %d)\n", pf, cur_array->key, prop_i);
sprintf(error_message, "Failed to parse int property ncolumns from '%s' for property '%s' (# %d)", pf, cur_array->key, prop_i);
free(props);
free(line); free(re_str);
return 0;
Expand Down Expand Up @@ -741,7 +738,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
this_re = SIMPLESTRING_RE; // "\\S+";
break;
default:
fprintf(stderr, "Unknown property type '%c' for property key '%s' (# %d)\n", col_type, cur_array->key, prop_i);
sprintf(error_message, "Unknown property type '%c' for property key '%s' (# %d)", col_type, cur_array->key, prop_i);
// free incomplete data before returning
free(cur_array->data);
cur_array->data = 0;
Expand Down Expand Up @@ -778,7 +775,7 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
pcre2_code *re = pcre2_compile((unsigned char *)re_str, PCRE2_ZERO_TERMINATED, 0, &pcre2_error, &erroffset, NULL);
if (re == NULL) {
pcre2_get_error_message(pcre2_error, (unsigned char *)line, line_len);
fprintf(stderr, "ERROR %s compiling pcre pattern for atoms lines offset %ld re '%s'\n", line, erroffset, re_str);
sprintf(error_message, "ERROR %s compiling pcre pattern for atoms lines offset %ld re '%s'", line, erroffset, re_str);
free(line); free(re_str);
return 0;
}
Expand All @@ -800,14 +797,14 @@ int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **
if (rc != tot_col_num+1) {
if (rc < 0) {
if (rc == PCRE2_ERROR_NOMATCH) {
fprintf(stderr, "ERROR: pcre2 regexp got NOMATCH on atom line %d\n", li);
sprintf(error_message, "ERROR: pcre2 regexp got NOMATCH on atom line %d", li);
} else {
fprintf(stderr, "ERROR: pcre2 regexp got error %d on atom line %d\n", rc, li);
sprintf(error_message, "ERROR: pcre2 regexp got error %d on atom line %d", rc, li);
}
} else if (rc == 0) {
fprintf(stderr, "ERROR: pcre2 regexp got match_data not big enough (should never happen) on atom line %d\n", li);
sprintf(error_message, "ERROR: pcre2 regexp got match_data not big enough (should never happen) on atom line %d", li);
} else {
fprintf(stderr, "ERROR: pcre2 regexp failed on atom line %d at group %d\n", li, rc-1);
sprintf(error_message, "ERROR: pcre2 regexp failed on atom line %d at group %d", li, rc-1);
}
pcre2_match_data_free(match_data); pcre2_code_free(re);
free(line); free(re_str);
Expand Down
2 changes: 1 addition & 1 deletion libextxyz/extxyz.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,6 @@ typedef struct dict_entry_struct {

void print_dict(DictEntry *dict);
void free_dict(DictEntry *dict);
int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment);
int extxyz_read_ll(cleri_grammar_t *kv_grammar, FILE *fp, int *nat, DictEntry **info, DictEntry **arrays, char *comment, char *error_message);
int extxyz_write_ll(FILE *fp, int nat, DictEntry *info, DictEntry *arrays);
void* extxyz_malloc(size_t nbytes);
11 changes: 9 additions & 2 deletions libextxyz/fextxyz.f90
Original file line number Diff line number Diff line change
Expand Up @@ -62,13 +62,14 @@ function extxyz_malloc(nbytes) bind(c) result(buffer)
type(C_PTR) :: buffer
end

function extxyz_read_ll(kv_grammar, fp, nat, info, arrays, comment) bind(c)
function extxyz_read_ll(kv_grammar, fp, nat, info, arrays, comment, error_message) bind(c)
use iso_c_binding
integer(kind=C_INT) :: extxyz_read_ll
type(C_PTR), value :: kv_grammar, fp
integer(kind=C_INT) :: nat
type(C_PTR) :: info, arrays
type(C_PTR), value :: comment
character(kind=C_CHAR) :: error_message(*)
end function extxyz_read_ll

function extxyz_write_ll(fp, nat, info, arrays) bind(c)
Expand Down Expand Up @@ -431,6 +432,8 @@ function read_extxyz_file(file, at, verbose) result(success)
type(Dictionary) :: f_info, f_arrays
real(C_DOUBLE) :: lattice(3, 3) ! FIXME change C_DOUBLE to DP when this moves to QUIP
integer :: i
character(C_CHAR), dimension(1024), target :: error_message
character(len=1024) :: f_error_message

success = .false.
if (present(verbose)) do_verbose = verbose
Expand All @@ -443,8 +446,12 @@ function read_extxyz_file(file, at, verbose) result(success)
c_info = c_loc(info)
c_arrays = c_loc(arrays)

err = extxyz_read_ll(kv_grammar, file, nat, c_info, c_arrays, C_NULL_PTR)
err = extxyz_read_ll(kv_grammar, file, nat, c_info, c_arrays, C_NULL_PTR, error_message)
success = (err == 1)
if (.not. success) then
call C_string_ptr_to_F_string(c_loc(error_message), f_error_message)
call print('error_message='//f_error_message)
end if

if (do_verbose) then
call print_dict(c_info)
Expand Down
35 changes: 30 additions & 5 deletions python/extxyz/cextxyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ class FILE_ptr(ctypes.c_void_p):
class cleri_grammar_t_ptr(ctypes.c_void_p):
pass

class ExtXYZError(Exception):
pass

DATA_I = 1
DATA_F = 2
DATA_B = 3
Expand Down Expand Up @@ -209,6 +212,20 @@ def cfclose(fp):
fclose.args = [FILE_ptr]
fclose(fp)


def cftell(fp):
ftell = libc.ftell
ftell.argtypes = [FILE_ptr]
ftell.restype = ctypes.c_long
return ftell(fp)


def cfseek(fp, offset, whence):
fseek = libc.fseek
fseek.argtypes = [FILE_ptr, ctypes.c_long, ctypes.c_int]
fseek.restype = ctypes.c_int
return fseek(fp, offset, whence)


def read_frame_dicts(fp, verbose=False, comment=None):
"""Read a single frame using extxyz_read_ll() C function
Expand All @@ -224,31 +241,39 @@ def read_frame_dicts(fp, verbose=False, comment=None):
nat = ctypes.c_int()
info = Dict_entry_ptr()
arrays = Dict_entry_ptr()
eof = False
failure = False

try:
if comment is not None:
comment = comment.encode('utf-8')
else:
comment = ctypes.POINTER(ctypes.c_char)()

error_message = ctypes.create_string_buffer(1024)
if not extxyz.extxyz_read_ll(_kv_grammar,
fp,
ctypes.byref(nat),
ctypes.byref(info),
ctypes.byref(arrays),
comment):
eof = True
raise EOFError()
comment,
error_message):
failure = True
if (error_message.value == b'' or
error_message.value.decode().startswith("Failed to parse int natoms from ' ")):
raise EOFError
else:
raise ExtXYZError(error_message.value.decode().strip().replace('\n', ''))

if verbose:
print('error_message:', error_message.value.decode())
extxyz.print_dict(info)
extxyz.print_dict(arrays)

py_info = c_to_py_dict(info, deepcopy=True)
py_arrays = c_to_py_dict(arrays, deepcopy=True)

finally:
if not eof:
if not failure:
extxyz.free_dict(info)
extxyz.free_dict(arrays)

Expand Down
20 changes: 18 additions & 2 deletions python/extxyz/extxyz.py
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,24 @@ def read_frame(file, verbose=0, use_cextxyz=True,

try:
if use_cextxyz:
natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose,
comment=comment)
try:
fpos = cextxyz.cftell(file)
natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose,
comment=comment)
except cextxyz.ExtXYZError as msg:
error_message, = msg.args
print('cextxyz error message', error_message)
if error_message.startswith('Failed to parse string'):
try:
# rewined then try again, ignoring comment line provided and interpreting as plain XYZ file with species and position columns
cextxyz.cfseek(file, fpos, 0)
natoms, info, arrays = cextxyz.read_frame_dicts(file, verbose=verbose,
comment="Properties=species:S:1:pos:R:3")
except cextxyz.ExtXYZError:
raise
else:
raise

properties = info.pop('Properties', 'species:S:1:pos:R:3')
properties = Properties(property_string=properties)
data = np.zeros(natoms, properties.dtype_vector)
Expand Down
11 changes: 11 additions & 0 deletions tests/test_ase_cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,17 @@ def test_long_header_line(tmp_path):
print("lengths", lengths)
at = read(tmp_path / "long_header.xyz")
assert at.info["test_float_a"] == pytest.approx(test_float)


def test_lammps_file(tmp_path):
with open(tmp_path / "lammps.xyz", "w") as fout:
fout.write("""1
Atoms. Timestep: 1000000
Ar 1.4102613692638457 0.9647607662828660 1.3209769521273491
""")
at = read(tmp_path / "lammps.xyz")
assert len(at) == 1
assert at.positions[0] == pytest.approx([1.4102613692638457, 0.9647607662828660, 1.3209769521273491])

# no constraint I/O support yet
##@pytest.mark.parametrize('constraint', [FixAtoms(indices=(0, 2)),
Expand Down
Loading

0 comments on commit 3c7f293

Please sign in to comment.