Skip to content

Commit

Permalink
Merge branch 'master' into revert-6122-disable-covr
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico authored Sep 26, 2024
2 parents 25b0de9 + 623bee9 commit ad18c93
Show file tree
Hide file tree
Showing 12 changed files with 77 additions and 58 deletions.
46 changes: 26 additions & 20 deletions src/assign.c
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,8 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
values = PROTECT(eval(PROTECT(lang2(sym_as_posixct, values)), R_GlobalEnv)); protecti+=2;
}
bool RHS_list_of_columns = TYPEOF(values)==VECSXP && length(cols)>1; // initial value; may be revised below
if (verbose) Rprintf(_("RHS_list_of_columns == %s\n"), RHS_list_of_columns ? "true" : "false");
if (verbose)
Rprintf("RHS_list_of_columns == %s\n", RHS_list_of_columns ? "true" : "false"); // # notranslate
if (TYPEOF(values)==VECSXP && length(cols)==1 && length(values)==1) {
SEXP item = VECTOR_ELT(values,0);
if (isNull(item) || length(item)==1 || length(item)==targetlen) {
Expand Down Expand Up @@ -744,6 +745,12 @@ SEXP assign(SEXP dt, SEXP rows, SEXP cols, SEXP newcolnames, SEXP values)
#define MSGSIZE 1000
static char memrecycle_message[MSGSIZE+1]; // returned to rbindlist so it can prefix with which one of the list of data.table-like objects

const char *columnDesc(int colnum, const char *colname) {
static char column_desc[MSGSIZE+1]; // can contain column names, hence relatively large allocation.
snprintf(column_desc, MSGSIZE, _("(column %d named '%s')"), colnum, colname);
return column_desc;
}

const char *memrecycle(const SEXP target, const SEXP where, const int start, const int len, SEXP source, const int sourceStart, const int sourceLen, const int colnum, const char *colname)
// like memcpy but recycles single-item source
// 'where' a 1-based INTEGER vector subset of target to assign to, or NULL or integer()
Expand Down Expand Up @@ -932,18 +939,17 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
// inside BODY that cater for 'where' or not. Maybe there's a way to merge the two macros in future.
// The idea is to do these range checks without calling coerceVector() (which allocates)

#define CHECK_RANGE(STYPE, RFUN, COND, FMT, TO, FMTVAL) {{ \
#define CHECK_RANGE(STYPE, RFUN, COND, FMTVAL, FMT) {{ \
const STYPE *sd = (const STYPE *)RFUN(source); \
for (int i=0; i<slen; ++i) { \
const STYPE val = sd[i+soff]; \
if (COND) { \
const char *sType = sourceIsI64 ? "integer64" : type2char(TYPEOF(source)); \
const char *tType = targetIsI64 ? "integer64" : type2char(TYPEOF(target)); \
snprintf(memrecycle_message, MSGSIZE, \
colnum == 0 \
? _("%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (target vector)") \
: _("%"FMT" (type '%s') at RHS position %d "TO" when assigning to type '%s' (column %d named '%s')"), \
FMTVAL, sType, i+1, tType, colnum, colname); \
snprintf(memrecycle_message, MSGSIZE, FMT, \
FMTVAL, sType, i+1, tType, \
/* NB: important for () to be part of the translated string as a signal of nominative case to translators */ \
colnum == 0 ? _("(target vector)") : columnDesc(colnum, colname)); \
/* string returned so that rbindlist/dogroups can prefix it with which item of its list this refers to */ \
break; \
} \
Expand All @@ -953,36 +959,36 @@ const char *memrecycle(const SEXP target, const SEXP where, const int start, con
switch(TYPEOF(target)) {
case LGLSXP:
switch (TYPEOF(source)) {
case RAWSXP: CHECK_RANGE(Rbyte, RAW, val!=0 && val!=1, "d", "taken as TRUE", val)
case INTSXP: CHECK_RANGE(int, INTEGER, val!=0 && val!=1 && val!=NA_INTEGER, "d", "taken as TRUE", val)
case RAWSXP: CHECK_RANGE(Rbyte, RAW, val!=0 && val!=1, val, _("%d (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
case INTSXP: CHECK_RANGE(int, INTEGER, val!=0 && val!=1 && val!=NA_INTEGER, val, _("%d (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
case REALSXP: if (sourceIsI64)
CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64, PRId64, "taken as TRUE", val)
else CHECK_RANGE(double, REAL, !ISNAN(val) && val!=0.0 && val!=1.0, "f", "taken as TRUE", val)
CHECK_RANGE(int64_t, REAL, val!=0 && val!=1 && val!=NA_INTEGER64, val, _("%"PRId64" (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
else CHECK_RANGE(double, REAL, !ISNAN(val) && val!=0.0 && val!=1.0, val, _("%f (type '%s') at RHS position %d taken as TRUE when assigning to type '%s' %s"))
} break;
case RAWSXP:
switch (TYPEOF(source)) {
case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, "d", "taken as 0", val)
case INTSXP: CHECK_RANGE(int, INTEGER, val<0 || val>255, val, _("%d (type '%s' at RHS position %d taken as 0 when assigning to type '%s' %s"))
case REALSXP: if (sourceIsI64)
CHECK_RANGE(int64_t, REAL, val<0 || val>255, PRId64, "taken as 0", val)
else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, "f", "either truncated (precision lost) or taken as 0", val)
CHECK_RANGE(int64_t, REAL, val<0 || val>255, val, _("%"PRId64" (type '%s') at RHS position %d taken as 0 when assigning to type '%s' %s"))
else CHECK_RANGE(double, REAL, !R_FINITE(val) || val<0.0 || val>256.0 || (int)val!=val, val, _("%f (type '%s') at RHS position %d either truncated (precision lost) or taken as 0 when assigning to type '%s' %s"))
} break;
case INTSXP:
switch (TYPEOF(source)) {
case REALSXP: if (sourceIsI64)
CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), PRId64, "out-of-range (NA)", val)
else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val)
CHECK_RANGE(int64_t, REAL, val!=NA_INTEGER64 && (val<=NA_INTEGER || val>INT_MAX), val, _("%"PRId64" (type '%s') at RHS position %d out-of-range (NA) when assigning to type '%s' %s"))
else CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int32_repres(val) || (int)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s"))
case CPLXSXP: CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
(ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
(ISNAN(val.r) || (within_int32_repres(val.r) && (int)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s"))
} break;
case REALSXP:
switch (TYPEOF(source)) {
case REALSXP: if (targetIsI64 && !sourceIsI64)
CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), "f", "out-of-range(NA) or truncated (precision lost)", val)
CHECK_RANGE(double, REAL, !ISNAN(val) && (!within_int64_repres(val) || (int64_t)val!=val), val, _("%f (type '%s') at RHS position %d out-of-range(NA) or truncated (precision lost) when assigning to type '%s' %s"))
break;
case CPLXSXP: if (targetIsI64)
CHECK_RANGE(Rcomplex, COMPLEX, !((ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)) &&
(ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), "f", "either imaginary part discarded or real part truncated (precision lost)", val.r)
else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), "f", "imaginary part discarded", val.i)
(ISNAN(val.r) || (R_FINITE(val.r) && (int64_t)val.r==val.r))), val.r, _("%f (type '%s') at RHS position %d either imaginary part discarded or real part truncated (precision lost) when assigning to type '%s' %s"))
else CHECK_RANGE(Rcomplex, COMPLEX, !(ISNAN(val.i) || (R_FINITE(val.i) && val.i==0.0)), val.r, _("%f (type '%s') at RHS position %d imaginary part discarded when assigning to type '%s' %s"))
}
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/bmerge.c
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r
if (xcols[col]>LENGTH(xdt) || xcols[col]<1) error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt));
int it = TYPEOF(VECTOR_ELT(idt, icols[col]-1));
int xt = TYPEOF(VECTOR_ELT(xdt, xcols[col]-1));
if (iN && it!=xt) error(_("typeof x.%s (%s) != typeof i.%s (%s)"), CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it));
if (iN && it!=xt)
error("typeof x.%s (%s) != typeof i.%s (%s)", CHAR(STRING_ELT(getAttrib(xdt,R_NamesSymbol),xcols[col]-1)), type2char(xt), CHAR(STRING_ELT(getAttrib(idt,R_NamesSymbol),icols[col]-1)), type2char(it)); // # notranslate
if (iN && it!=LGLSXP && it!=INTSXP && it!=REALSXP && it!=STRSXP)
error(_("Type '%s' is not supported for joining/merging"), type2char(it));
}
Expand Down
12 changes: 8 additions & 4 deletions src/dogroups.c
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
}
setAttrib(BY, R_NamesSymbol, bynames); // Fix for #42 - BY doesn't retain names anymore
R_LockBinding(sym_BY, env);
if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols))) error(_("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]"),length(bynames),length(groups),length(grpcols));
if (isNull(jiscols) && (length(bynames)!=length(groups) || length(bynames)!=length(grpcols)))
error("!length(bynames)[%d]==length(groups)[%d]==length(grpcols)[%d]", length(bynames), length(groups), length(grpcols)); // # notranslate
// TO DO: check this check above.

N = PROTECT(findVar(install(".N"), env)); nprotect++; // PROTECT for rchk
Expand Down Expand Up @@ -135,7 +136,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
// fetch names of .SD and prepare symbols. In case they are copied-on-write by user assigning to those variables
// using <- in j (which is valid, useful and tested), they are repointed to the .SD cols for each group.
SEXP names = PROTECT(getAttrib(SDall, R_NamesSymbol)); nprotect++;
if (length(names) != length(SDall)) error(_("length(names)!=length(SD)"));
if (length(names) != length(SDall))
error("length(names)!=length(SD)"); // # notranslate
SEXP *nameSyms = (SEXP *)R_alloc(length(names), sizeof(SEXP));

for(int i=0; i<length(SDall); ++i) {
Expand All @@ -151,7 +153,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
}

SEXP xknames = PROTECT(getAttrib(xSD, R_NamesSymbol)); nprotect++;
if (length(xknames) != length(xSD)) error(_("length(xknames)!=length(xSD)"));
if (length(xknames) != length(xSD))
error("length(xknames)!=length(xSD)"); // # notranslate
SEXP *xknameSyms = (SEXP *)R_alloc(length(xknames), sizeof(SEXP));
for(int i=0; i<length(xSD); ++i) {
if (SIZEOF(VECTOR_ELT(xSD, i))==0)
Expand Down Expand Up @@ -400,7 +403,8 @@ SEXP dogroups(SEXP dt, SEXP dtcols, SEXP groups, SEXP grpcols, SEXP jiscols, SEX
} else {
estn = ((double)ngrp/i)*1.1*(ansloc+maxn);
if (verbose) Rprintf(_("dogroups: growing from %d to %d rows\n"), length(VECTOR_ELT(ans,0)), estn);
if (length(ans) != ngrpcols + njval) error(_("dogroups: length(ans)[%d]!=ngrpcols[%d]+njval[%d]"),length(ans),ngrpcols,njval);
if (length(ans) != ngrpcols + njval)
error("dogroups: length(ans)[%d]!=ngrpcols[%d]+njval[%d]", length(ans), ngrpcols, njval); // # notranslate
for (int j=0; j<length(ans); ++j) SET_VECTOR_ELT(ans, j, growVector(VECTOR_ELT(ans,j), estn));
}
}
Expand Down
7 changes: 3 additions & 4 deletions src/forder.c
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
}
if (key[nradix]!=NULL) nradix++; // nradix now number of bytes in key
#ifdef TIMING_ON
Rprintf(_("nradix=%d\n"), nradix);
Rprintf("nradix=%d\n", nradix); // # notranslate
#endif

// global nth, TMP & UGRP
Expand Down Expand Up @@ -869,9 +869,8 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP retStatsArg, SEXP sortGroupsA
for (int i=0; i<=last; i++) {
Rprintf(_("Timing block %2d%s = %8.3f %8d\n"), i, (i>=17&&i<=19)?"(*)":" ", tblock[i], nblock[i]);
}
for (int i=0; i<=256; i++) {
if (stat[i]) Rprintf(_("stat[%03d]==%20"PRIu64"\n"), i, (uint64_t)stat[i]);
}
for (int i=0; i<=256; i++) if (stat[i])
Rprintf("stat[%03d]==%20"PRIu64"\n", i, (uint64_t)stat[i]); // # notranslate
}
#endif
return ans;
Expand Down
14 changes: 7 additions & 7 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -1340,10 +1340,10 @@ int freadMain(freadMainArgs _args) {
if (*NAstrings == NULL) {
DTPRINT(_(" No NAstrings provided.\n"));
} else {
DTPRINT(_(" NAstrings = ["));
DTPRINT(" NAstrings = ["); // # notranslate
const char * const* s = NAstrings;
while (*s++) DTPRINT(*s? "<<%s>>, " : "<<%s>>", s[-1]);
DTPRINT(_("]\n"));
DTPRINT("]\n"); // # notranslate
if (any_number_like_NAstrings)
DTPRINT(_(" One or more of the NAstrings looks like a number.\n"));
else
Expand Down Expand Up @@ -2085,14 +2085,14 @@ int freadMain(freadMainArgs _args) {
// sd can be very close to 0.0 sometimes, so apply a +10% minimum
// blank lines have length 1 so for fill=true apply a +100% maximum. It'll be grown if needed.
if (verbose) {
DTPRINT(_(" =====\n"));
DTPRINT(" =====\n"); // # notranslate
DTPRINT(_(" Sampled %"PRIu64" rows (handled \\n inside quoted fields) at %d jump points\n"), (uint64_t)sampleLines, nJumps);
DTPRINT(_(" Bytes from first data row on line %d to the end of last row: %"PRIu64"\n"), row1line, (uint64_t)bytesRead);
DTPRINT(_(" Line length: mean=%.2f sd=%.2f min=%d max=%d\n"), meanLineLen, sd, minLen, maxLen);
DTPRINT(_(" Estimated number of rows: %"PRIu64" / %.2f = %"PRIu64"\n"), (uint64_t)bytesRead, meanLineLen, (uint64_t)estnrow);
DTPRINT(_(" Initial alloc = %"PRIu64" rows (%"PRIu64" + %d%%) using bytes/max(mean-2*sd,min) clamped between [1.1*estn, 2.0*estn]\n"),
(uint64_t)allocnrow, (uint64_t)estnrow, (int)(100.0*allocnrow/estnrow-100.0));
DTPRINT(_(" =====\n"));
DTPRINT(" =====\n"); // # notranslate
} else {
if (sampleLines > allocnrow) INTERNAL_STOP("sampleLines(%"PRIu64") > allocnrow(%"PRIu64")", (uint64_t)sampleLines, (uint64_t)allocnrow); // # nocov
}
Expand Down Expand Up @@ -2266,8 +2266,8 @@ int freadMain(freadMainArgs _args) {
if (verbose) DTPRINT(_("[11] Read the data\n"));
read: // we'll return here to reread any columns with out-of-sample type exceptions, or dirty jumps
restartTeam = false;
if (verbose) DTPRINT(_(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n"),
jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos));
if (verbose)
DTPRINT(" jumps=[%d..%d), chunk_size=%"PRIu64", total_size=%"PRIu64"\n", jump0, nJumps, (uint64_t)chunkBytes, (uint64_t)(eof-pos)); // # notranslate
ASSERT(allocnrow <= nrowLimit, "allocnrow(%"PRIu64") <= nrowLimit(%"PRIu64")", (uint64_t)allocnrow, (uint64_t)nrowLimit);
#pragma omp parallel num_threads(nth)
{
Expand Down Expand Up @@ -2744,7 +2744,7 @@ int freadMain(freadMainArgs _args) {
}

if (verbose) {
DTPRINT(_("=============================\n"));
DTPRINT("=============================\n"); // # notranslate
if (tTot<0.000001) tTot=0.000001; // to avoid nan% output in some trivially small tests where tot==0.000s
DTPRINT(_("%8.3fs (%3.0f%%) Memory map %.3fGB file\n"), tMap-t0, 100.0*(tMap-t0)/tTot, 1.0*fileSize/(1024*1024*1024));
DTPRINT(_("%8.3fs (%3.0f%%) sep="), tLayout-tMap, 100.0*(tLayout-tMap)/tTot);
Expand Down
5 changes: 3 additions & 2 deletions src/freadR.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,8 @@ SEXP freadR(
args.skipEmptyLines = LOGICAL(skipEmptyLinesArg)[0];
args.fill = INTEGER(fillArg)[0];
args.showProgress = LOGICAL(showProgressArg)[0];
if (INTEGER(nThreadArg)[0]<1) error(_("nThread(%d)<1"), INTEGER(nThreadArg)[0]);
if (INTEGER(nThreadArg)[0]<1)
error("nThread(%d)<1", INTEGER(nThreadArg)[0]); // # notranslate
args.nth = (uint32_t)INTEGER(nThreadArg)[0];
args.verbose = verbose;
args.warningsAreErrors = warningsAreErrors;
Expand Down Expand Up @@ -716,7 +717,7 @@ void __halt(bool warn, const char *format, ...) {
// if (warn) warning(_("%s"), msg);
// this warning() call doesn't seem to honor warn=2 straight away in R 3.6, so now always call error() directly to be sure
// we were going via warning() before to get the (converted from warning) prefix in the message (which we could mimic in future)
error(_("%s"), msg); // include "%s" because data in msg might include '%'
error("%s", msg); // # notranslate. include "%s" because data in msg might include '%'
}

void prepareThreadContext(ThreadLocalFreadParsingContext *ctx) {}
Expand Down
6 changes: 4 additions & 2 deletions src/fsort.c
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {

int nth = getDTthreads(xlength(x), true);
int nBatch=nth*2; // at least nth; more to reduce last-man-home; but not too large to keep counts small in cache
if (verbose) Rprintf(_("nth=%d, nBatch=%d\n"),nth,nBatch);
if (verbose)
Rprintf("nth=%d, nBatch=%d\n", nth, nBatch); // # notranslate

size_t batchSize = (xlength(x)-1)/nBatch + 1;
if (batchSize < 1024) batchSize = 1024; // simple attempt to work reasonably for short vector. 1024*8 = 2 4kb pages
Expand Down Expand Up @@ -185,7 +186,8 @@ SEXP fsort(SEXP x, SEXP verboseArg) {
int MSBNbits = maxBit > 15 ? 16 : maxBit+1; // how many bits make up the MSB
int shift = maxBit + 1 - MSBNbits; // the right shift to leave the MSB bits remaining
size_t MSBsize = 1LL<<MSBNbits; // the number of possible MSB values (16 bits => 65,536)
if (verbose) Rprintf(_("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n"), maxBit, MSBNbits, shift, MSBsize);
if (verbose)
Rprintf("maxBit=%d; MSBNbits=%d; shift=%d; MSBsize=%zu\n", maxBit, MSBNbits, shift, MSBsize); // # notranslate

uint64_t *counts = (uint64_t *)R_alloc(nBatch*MSBsize, sizeof(uint64_t));
memset(counts, 0, nBatch*MSBsize*sizeof(uint64_t));
Expand Down
Loading

0 comments on commit ad18c93

Please sign in to comment.