diff --git a/R/data.table.R b/R/data.table.R index 7b48704a1..fe0f42a56 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -199,7 +199,7 @@ replace_dot_alias = function(e) { } return(x) } - if (!mult %chin% c("first","last","all")) stopf("mult argument can only be 'first', 'last' or 'all'") + if (!mult %chin% c("first", "last", "all")) stopf("mult argument can only be 'first', 'last' or 'all'") missingroll = missing(roll) if (length(roll)!=1L || is.na(roll)) stopf("roll must be a single TRUE, FALSE, positive/negative integer/double including +Inf and -Inf or 'nearest'") if (is.character(roll)) { diff --git a/src/bmerge.c b/src/bmerge.c index f6f640e71..47d6a1456 100644 --- a/src/bmerge.c +++ b/src/bmerge.c @@ -49,8 +49,10 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r // iArg, xArg, icolsArg and xcolsArg idtVec = SEXPPTR_RO(idt); // set globals so bmerge_r can see them. xdtVec = SEXPPTR_RO(xdt); - if (!isInteger(icolsArg)) internal_error(__func__, "icols is not integer vector"); // # nocov - if (!isInteger(xcolsArg)) internal_error(__func__, "xcols is not integer vector"); // # nocov + if (!isInteger(icolsArg)) + internal_error(__func__, "icols is not integer vector"); // # nocov + if (!isInteger(xcolsArg)) + internal_error(__func__, "xcols is not integer vector"); // # nocov if ((LENGTH(icolsArg)==0 || LENGTH(xcolsArg)==0) && LENGTH(idt)>0) // We let through LENGTH(i) == 0 for tests 2126.* internal_error(__func__, "icols and xcols must be non-empty integer vectors"); if (LENGTH(icolsArg) > LENGTH(xcolsArg)) internal_error(__func__, "length(icols) [%d] > length(xcols) [%d]", LENGTH(icolsArg), LENGTH(xcolsArg)); // # nocov @@ -60,10 +62,14 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r iN = ilen = anslen = LENGTH(idt) ? LENGTH(VECTOR_ELT(idt,0)) : 0; ncol = LENGTH(icolsArg); // there may be more sorted columns in x than involved in the join for(int col=0; colLENGTH(idt) || icols[col]<1) error(_("icols[%d]=%d outside range [1,length(i)=%d]"), col, icols[col], LENGTH(idt)); - if (xcols[col]>LENGTH(xdt) || xcols[col]<1) error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt)); + if (icols[col]==NA_INTEGER) + internal_error(__func__, "icols[%d] is NA", col); // # nocov + if (xcols[col]==NA_INTEGER) + internal_error(__func__, "xcols[%d] is NA", col); // # nocov + if (icols[col]>LENGTH(idt) || icols[col]<1) + error(_("icols[%d]=%d outside range [1,length(i)=%d]"), col, icols[col], LENGTH(idt)); + if (xcols[col]>LENGTH(xdt) || xcols[col]<1) + error(_("xcols[%d]=%d outside range [1,length(x)=%d]"), col, xcols[col], LENGTH(xdt)); int it = TYPEOF(VECTOR_ELT(idt, icols[col]-1)); int xt = TYPEOF(VECTOR_ELT(xdt, xcols[col]-1)); if (iN && it!=xt) @@ -75,11 +81,14 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r // rollArg, rollendsArg roll = 0.0; rollToNearest = FALSE; if (isString(rollarg)) { - if (strcmp(CHAR(STRING_ELT(rollarg,0)),"nearest") != 0) error(_("roll is character but not 'nearest'")); - if (ncol>0 && TYPEOF(VECTOR_ELT(idt, icols[ncol-1]-1))==STRSXP) error(_("roll='nearest' can't be applied to a character column, yet.")); + if (strcmp(CHAR(STRING_ELT(rollarg,0)),"nearest") != 0) + error(_("roll is character but not 'nearest'")); + if (ncol>0 && TYPEOF(VECTOR_ELT(idt, icols[ncol-1]-1))==STRSXP) + error(_("roll='nearest' can't be applied to a character column, yet.")); roll=1.0; rollToNearest=TRUE; // the 1.0 here is just any non-0.0, so roll!=0.0 can be used later } else { - if (!isReal(rollarg)) internal_error(__func__, "roll is not character or double"); // # nocov + if (!isReal(rollarg)) + internal_error(__func__, "roll is not character or double"); // # nocov roll = REAL(rollarg)[0]; // more common case (rolling forwards or backwards) or no roll when 0.0 } rollabs = fabs(roll); @@ -98,10 +107,14 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r } // mult arg - if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "all")) mult = ALL; - else if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "first")) mult = FIRST; - else if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "last")) mult = LAST; - else internal_error(__func__, "invalid value for 'mult'"); // # nocov + if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "all")) + mult = ALL; + else if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "first")) + mult = FIRST; + else if (!strcmp(CHAR(STRING_ELT(multArg, 0)), "last")) + mult = LAST; + else + internal_error(__func__, "invalid value for 'mult'"); // # nocov // opArg if (!isInteger(opArg) || length(opArg)!=ncol) @@ -132,7 +145,8 @@ SEXP bmerge(SEXP idt, SEXP xdt, SEXP icolsArg, SEXP xcolsArg, SEXP xoArg, SEXP r retLength = R_Calloc(anslen, int); retIndex = R_Calloc(anslen, int); // initialise retIndex here directly, as next loop is meant for both equi and non-equi joins - for (int j=0; j1) allLen1[0] = FALSE; + if (len>1) { + if (mult==ALL) + allLen1[0] = FALSE; // bmerge()$allLen1 + } if (nqmaxgrp == 1) { - const int rf = (mult!=LAST) ? xlow+2-rollLow : xupp+rollUpp; // extra +1 for 1-based indexing at R level - const int rl = (mult==ALL) ? len : 1; + const int rf = (mult!=LAST) ? xlow+2-rollLow : xupp+rollUpp; // bmerge()$starts thus extra +1 for 1-based indexing at R level + const int rl = (mult==ALL) ? len : 1; // bmerge()$lens for (int j=ilow+1; jlimit) error(_("Join results in %d rows; more than %d = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE. Otherwise, please search for this error message in the FAQ, Wiki, Stack Overflow and data.table issue tracker for advice."), reslen, (int)limit); + if (limit<0) + error(_("clamp must be positive")); // # nocov + if (reslen>limit) + error(_("Join results in %d rows; more than %d = nrow(x)+nrow(i). Check for duplicate key values in i each of which join to the same group in x over and over again. If that's ok, try by=.EACHI to run j for each group to avoid the large allocation. If you are sure you wish to proceed, rerun with allow.cartesian=TRUE. Otherwise, please search for this error message in the FAQ, Wiki, Stack Overflow and data.table issue tracker for advice."), reslen, (int)limit); } SEXP ans = PROTECT(allocVector(INTSXP, reslen)); int *ians = INTEGER(ans);