Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Message fixes for the C code #6504

Merged
merged 20 commits into from
Sep 20, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions src/fmelt.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,8 @@ SEXP whichwrapper(SEXP x, SEXP val) {

static const char *concat(SEXP vec, SEXP idx) {
if (!isString(vec)) error(_("concat: 'vec' must be a character vector"));
if (!isInteger(idx) || length(idx) < 0) error(_("concat: 'idx' must be an integer vector of length >= 0"));
if (!isInteger(idx))
error(_("concat: 'idx' must be an integer vector"));

static char ans[1024]; // so only one call to concat() per calling warning/error
int nidx=length(idx), nvec=length(vec);
Expand Down Expand Up @@ -802,7 +803,8 @@ SEXP fmelt(SEXP DT, SEXP id, SEXP measure, SEXP varfactor, SEXP valfactor, SEXP
}
int protecti=0;
dtnames = PROTECT(getAttrib(DT, R_NamesSymbol)); protecti++;
if (isNull(dtnames)) error(_("names(data) is NULL. Please report to data.table-help"));
if (isNull(dtnames))
internal_error(__func__, "names(data) is NULL");
if (LOGICAL(narmArg)[0] == TRUE) narm = TRUE;
if (LOGICAL(verboseArg)[0] == TRUE) verbose = TRUE;
struct processData data;
Expand Down
26 changes: 17 additions & 9 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1888,11 +1888,14 @@ int freadMain(freadMainArgs _args) {
// *2 to get a good spacing. We don't want overlaps resulting in double counting.
}
if (verbose) {
DTPRINT(_(" Number of sampling jump points = %d because "), nJumps);
if (nrowLimit<INT64_MAX) DTPRINT(_("nrow limit (%"PRIu64") supplied\n"), (uint64_t)nrowLimit);
else if (jump0size==0) DTPRINT(_("jump0size==0\n"));
else DTPRINT(_("(%"PRIu64" bytes from row 1 to eof) / (2 * %"PRIu64" jump0size) == %"PRIu64"\n"),
(uint64_t)sz, (uint64_t)jump0size, (uint64_t)(sz/(2*jump0size)));
if (nrowLimit<INT64_MAX) {
DTPRINT(_(" Number of sampling jump points = %d because nrow limit (%"PRIu64") supplied\n"), nJumps, (uint64_t)nrowLimit);
} else if (jump0size==0) {
DTPRINT(_(" Number of sampling jump points = %d because jump0size==0\n"), nJumps);
} else {
DTPRINT(_(" Number of sampling jump points = %d because (%"PRIu64" bytes from row 1 to eof) / (2 * %"PRIu64" jump0size) == %"PRIu64"\n"),
nJumps, (uint64_t)sz, (uint64_t)jump0size, (uint64_t)(sz/(2*jump0size)));
}
}
nJumps++; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
if (nrowLimit<INT64_MAX && nrowLimit>0) nJumps=1; // when nrows>0 supplied by user, no jumps (not even at the end) and single threaded
Expand Down Expand Up @@ -1930,8 +1933,10 @@ int freadMain(freadMainArgs _args) {
}
if ( (thisNcol<ncol && ncol>1 && !fill) ||
(!eol(&ch) && ch!=eof) ) {
if (verbose) DTPRINT(_(" A line with too-%s fields (%d/%d) was found on line %d of sample jump %d. %s\n"),
thisNcol<ncol ? _("few") : _("many"), thisNcol, ncol, jumpLine, jump, jump>0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
if (verbose)
DTPRINT(thisNcol<ncol ? _(" A line with too-few fields (%d/%d) was found on line %d of sample jump %d. %s\n")
: _(" A line with too-many fields (%d/%d) was found on line %d of sample jump %d. %s\n"),
thisNcol, ncol, jumpLine, jump, jump>0 ? _("Most likely this jump landed awkwardly so type bumps here will be skipped.") : "");
bumped = false;
if (jump==0) lastRowEnd=eof; // to prevent the end from being tested; e.g. a short file with blank line within first 100 like test 976
break;
Expand Down Expand Up @@ -2035,7 +2040,10 @@ int freadMain(freadMainArgs _args) {
}
if (verbose) {
if (sampleLines==0) {
DTPRINT(_(" 'header' determined to be %s because there are%s number fields in the first and only row\n"), args.header?"TRUE":"FALSE", args.header?_(" no"):"");
if (args.header)
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
DTPRINT(_(" 'header' determined to be TRUE because there are no number fields in the first and only row\n"));
else
DTPRINT(_(" 'header' determined to be FALSE because there are number fields in the first and only row\n"));
MichaelChirico marked this conversation as resolved.
Show resolved Hide resolved
} else {
if (args.header)
DTPRINT(_(" 'header' determined to be true because all columns are type string and a better guess is not possible\n"));
Expand Down Expand Up @@ -2621,7 +2629,7 @@ int freadMain(freadMainArgs _args) {

if (stopTeam) {
if (internalErr[0]!='\0') {
STOP("%s %s: %s. %s", _("Internal error in"), __func__, internalErr, _("Please report to the data.table issues tracker")); // # nocov
STOP(_("Internal error in %s: %s. Please report to the data.table issues tracker"), __func__, internalErr); // # nocov
}
stopTeam = false;

Expand Down
16 changes: 11 additions & 5 deletions src/fsort.c
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,16 @@ int qsort_cmp(const void *a, const void *b) {
return (x<y)-(x>y); // largest first in a safe branchless way casting long to int
}

static size_t shrinkMSB(size_t MSBsize, uint64_t *msbCounts, int *order, Rboolean verbose) {
size_t oldMSBsize = MSBsize;
while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2)
MSBsize--;
if (verbose && oldMSBsize != MSBsize) {
Rprintf(_("Reduced MSBsize from %zu to %zu by excluding 0 and 1 counts\n"), oldMSBsize, MSBsize);
}
return MSBsize;
}

/*
OpenMP is used here to find the range and distribution of data for efficient
grouping and sorting.
Expand Down Expand Up @@ -252,12 +262,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {

if (verbose) {
Rprintf(_("Top 20 MSB counts: ")); for(int i=0; i<MIN(MSBsize,20); i++) Rprintf(_("%"PRId64" "), (int64_t)msbCounts[order[i]]); Rprintf(_("\n"));
Rprintf(_("Reduced MSBsize from %zu to "), MSBsize);
}
while (MSBsize>0 && msbCounts[order[MSBsize-1]] < 2) MSBsize--;
if (verbose) {
Rprintf(_("%zu by excluding 0 and 1 counts\n"), MSBsize);
}
MSBsize = shrinkMSB(MSBsize, msbCounts, order, verbose);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I might have assumed this helper would be by-reference on MSBsize; OTOH, it's a single size_t and this is only done once in fsort(). The impact should not be noticed.


bool failed=false, alloc_fail=false, non_monotonic=false; // shared bools only ever assigned true; no need for atomic or critical assign
t[6] = wallclock();
Expand Down
3 changes: 1 addition & 2 deletions src/gsumm.c
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,6 @@ void *gather(SEXP x, bool *anyNA)
{
double started=wallclock();
const bool verbose = GetVerbose();
if (verbose) Rprintf(_("gather took ... "));
switch (TYPEOF(x)) {
case LGLSXP: case INTSXP: {
const int *restrict thisx = INTEGER(x);
Expand Down Expand Up @@ -341,7 +340,7 @@ void *gather(SEXP x, bool *anyNA)
default :
error(_("gather implemented for INTSXP, REALSXP, and CPLXSXP but not '%s'"), type2char(TYPEOF(x))); // # nocov
}
if (verbose) { Rprintf(_("%.3fs\n"), wallclock()-started); }
if (verbose) { Rprintf(_("gather took %.3fs\n"), wallclock()-started); }
return gx;
}

Expand Down
6 changes: 3 additions & 3 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -222,9 +222,9 @@ void attribute_visible R_init_data_table(DllInfo *info)
if (ld != 0.0) error(_("Checking memset(&ld, 0, sizeof(long double)); ld == (long double)0.0 %s"), msg);

// Check unsigned cast used in fread.c. This isn't overflow/underflow, just cast.
if ((uint_fast8_t)('0'-'/') != 1) error(_("The ascii character '/' is not just before '0'"));
if ((uint_fast8_t)('0'-'/') != 1) error(_("Unlike the very common case, e.g. ASCII, the character '/' is not just before '0'."));
if ((uint_fast8_t)('/'-'0') < 10) error(_("The C expression (uint_fast8_t)('/'-'0')<10 is true. Should be false."));
if ((uint_fast8_t)(':'-'9') != 1) error(_("The ascii character ':' is not just after '9'"));
if ((uint_fast8_t)(':'-'9') != 1) error(_("Unlike the very common case, e.g. ASCII, the character ':' is not just after '9'."));
if ((uint_fast8_t)('9'-':') < 10) error(_("The C expression (uint_fast8_t)('9'-':')<10 is true. Should be false."));

// Variables rather than #define for NA_INT64 to ensure correct usage; i.e. not casted
Expand Down Expand Up @@ -364,7 +364,7 @@ SEXP beforeR340(void) {
extern int *_Last_updated; // assign.c

SEXP initLastUpdated(SEXP var) {
if (!isInteger(var) || LENGTH(var)!=1) error(_(".Last.value in namespace is not a length 1 integer"));
if (!isInteger(var) || LENGTH(var)!=1) error(_(".Last.updated in namespace is not a length 1 integer"));
_Last_updated = INTEGER(var);
return R_NilValue;
}
Expand Down
Loading