Skip to content

Commit

Permalink
#420 example in man page
Browse files Browse the repository at this point in the history
  • Loading branch information
gagolews committed May 2, 2021
1 parent 8e3d041 commit 5c27f63
Show file tree
Hide file tree
Showing 234 changed files with 479 additions and 413 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: stringi
Version: 1.6.1
Date: 2021-05-01
Date: 2021-05-03
Title: Character String Processing Facilities
Description: A multitude of character string/text/natural language
processing tools: pattern searching (e.g., with 'Java'-like regular
Expand Down
20 changes: 12 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

.PHONY: r check build clean purge sphinx test

PKGNAME="stringi"

all: r

autoconf:
Expand All @@ -20,27 +22,28 @@ r-icu-bundle:
R CMD INSTALL . --configure-args='--disable-pkg-config'

tinytest:
Rscript -e 'options(width=120); source("devel/tinytest.R")'
Rscript -e 'source("devel/tinytest.R")'

test: r tinytest

build:
cd .. && R CMD INSTALL stringi --preclean --html
cd .. && R CMD build stringi
cd .. && R CMD INSTALL ${PKGNAME} --preclean --html
cd .. && R CMD build ${PKGNAME}
make clean

check: build
cd .. && R CMD check `ls -t stringi*.tar.gz | head -1` --no-manual
cd .. && R CMD check `ls -t ${PKGNAME}*.tar.gz | head -1` --no-manual

check-cran: build
cd .. && STRINGI_DISABLE_PKG_CONFIG=1 R CMD check `ls -t stringi*.tar.gz | head -1` --as-cran
cd .. && STRINGI_DISABLE_PKG_CONFIG=1 R CMD check `ls -t ${PKGNAME}*.tar.gz | head -1` --as-cran

weave:
cd devel/sphinx/weave && make && cd ../../../

rd2rst:
# https://github.com/gagolews/Rd2rst
cd devel/sphinx && Rscript -e "Rd2rst::Rd2rst('stringi')" && cd ../../
# TODO: if need be, you can also use MyST in the future
cd devel/sphinx && Rscript -e "Rd2rst::Rd2rst('${PKGNAME}')" && cd ../../

news:
cd devel/sphinx && pandoc ../../NEWS -f markdown -t rst -o news.rst
Expand All @@ -56,11 +59,12 @@ sphinx: r weave rd2rst news
touch docs/.nojekyll

clean:
find src -name '*.o' -exec rm {} \;
find src -name '*.so' -exec rm {} \;
rm -f src/*.o src/*.so # will not remove src/icuXY/*/*.o
rm -f src/Makevars src/uconfig_local.h \
src/install.libs.R config.log config.status src/symbols.rds

purge: clean
find src -name '*.o' -exec rm {} \;
find src -name '*.so' -exec rm {} \;
rm -f man/*.Rd
rm -fr autom4te.cache
3 changes: 3 additions & 0 deletions NEWS
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@
(e.g., for ordering data frames with regards to multiple criteria,
the ranks can be passed to `order()`, see #219).

* [NEW FEATURE] `%s$%` and `%stri$%` are now vectorised with respect to
both arguments.

* [BUGFIX] `stri_sort_key()` now outputs `bytes`-encoded strings.

* [BUGFIX] #415: `locale=''` was not equivalent to `locale=NULL`
Expand Down
31 changes: 22 additions & 9 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -231,15 +231,20 @@ stri_replace_na <- function(str, replacement = "NA")
#' Provides access to base R's \code{\link{sprintf}} in form of a binary
#' operator in a way similar to Python's \code{\%} overloaded for strings.
#'
#'
#'
#' @details
#' Vectorized over \code{e2}.
#' Vectorized over \code{e1} and \code{e2}.
#'
#' \code{e1 \%s$\% atomic_vector} is equivalent to
#' \code{e1 \%s$\% list(atomic_vector)}.
#'
#' Note that \code{\link{sprintf}} takes field width in bytes,
#' not Unicode code points. See Examples for a workaround.
#'
#' @param e1 a single format string, see \code{\link{sprintf}} for syntax
#' @param e2 a list of arguments to be passed to \code{\link{sprintf}}
#'
#' @param e1 format strings, see \code{\link{sprintf}} for syntax
#' @param e2 a list of atomic vectors to be passed to \code{\link{sprintf}}
#' or a single atomic vector
#'
#' @return
Expand All @@ -254,6 +259,10 @@ stri_replace_na <- function(str, replacement = "NA")
#' "%s='%d'" %s$% list(c("a", "b", "c"), 1)
#' "%s='%d'" %s$% list(c("a", "b", "c"), 1:3)
#'
#' # sprintf field width:
#' cat(sprintf("%6s%6s%6s", "-", c("asc", "ąść", "abcdefg"), "-"), sep="\n")
#' cat(sprintf("%6s%s%6s", "-", stringi::stri_pad(c("asc", "ąść", "abcdefg"), 6), "-"), sep="\n")
#'
#' @rdname operator_dollar
#' @aliases operator_dollar oper_dollar
#'
Expand All @@ -263,25 +272,29 @@ stri_replace_na <- function(str, replacement = "NA")
#' @export
`%s$%` <- function(e1, e2)
{
stopifnot(is.character(e1), length(e1) == 1, !is.na(e1))



if (!is.list(e2))
e2 <- list(e2)

# this is stringi, assure UTF-8 output and proper NA handling!
e1 <- stri_enc_toutf8(as.character(e1))
if (length(e1) == 0) return(character(0))

for (i in seq_along(e2)) {
if (is.character(e2[[i]])) {
stopifnot(is.atomic(e2[[i]])) # factor is atomic
if (length(e2[[i]]) == 0) return(character(0))
if (is.character(e2[[i]]) || is.factor(e2[[i]])) {
e2[[i]] <- stri_enc_toutf8(e2[[i]])
}
}

ret <- stri_enc_toutf8(do.call(sprintf, as.list(c(e1, e2))))
ret <- stri_enc_toutf8(do.call(sprintf, as.list(c(list(e1), e2))))
# for the time being, let stri_paste determine NAs
# (it might be too greedy if there are unused strings)
which_na <- do.call(stri_paste, e2)
ret[is.na(which_na)] <- NA_character_

ret[is.na(e1)] <- NA_character_

ret
}

Expand Down
Binary file modified devel/sphinx/_build/doctrees/environment.pickle
Binary file not shown.
Binary file modified devel/sphinx/_build/doctrees/index.doctree
Binary file not shown.
Binary file modified devel/sphinx/_build/doctrees/install.doctree
Binary file not shown.
Binary file modified devel/sphinx/_build/doctrees/news.doctree
Binary file not shown.
2 changes: 1 addition & 1 deletion devel/sphinx/_build/html/.buildinfo
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Sphinx build info version 1
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
config: 23aaba17a8c5782509698d22c2e8cdab
config: 0c0f517788ce13489b55ee3f709a2dbd
tags: 645f666f9bcd5a90fca523b33c5a78b7
9 changes: 5 additions & 4 deletions devel/sphinx/_build/html/_sources/index.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ It gives you a multitude of functions for:
* pattern searching (e.g., with ICU Java-like regular expressions),
* collation and sorting,
* random string generation,
* case mapping,
* case mapping and folding,
* string transliteration,
* Unicode normalisation,
* date-time formatting and parsing,
Expand All @@ -45,9 +45,10 @@ by calling:
It has been released under the open source BSD-3-clause
`license <https://raw.githubusercontent.com/gagolews/stringi/master/LICENSE>`_.

The package's API was inspired by Hadley Wickham's
`stringr <https://stringr.tidyverse.org/>`_ package
(and since 2015 `stringr` is powered by `stringi`).
The package's API was inspired by that of the early (pre-tidyverse; v0.6.2)
version of Hadley Wickham's
`stringr <https://cran.r-project.org/web/packages/stringr/>`_
package (and since the 2015 v1.0.0 `stringr` is powered by `stringi`).
Moreover, Hadley suggested quite a few new package features.
The contributions from Bartłomiej Tartanus and
`many others <https://github.com/gagolews/stringi/graphs/contributors>`_
Expand Down
56 changes: 27 additions & 29 deletions devel/sphinx/_build/html/_sources/install.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ by calling:

install.packages("stringi", configure.vars="ICUDT_DIR=<icudt_dir>")

Moreover, if you have **absolutely no internet access** on the machines
you try to install stringi on, try fetching the latest development
version of the package, as it is shipped with the ``ICU`` data archives.
You can build a distributable source package that includes all the
required ICU data files (for off-line use) by omitting some relevant
lines in the ``.Rbuildignore`` file. The following command sequence
should do the trick:
Moreover, if you have **no internet access** on the machines you try to
install stringi on, try fetching the latest development version of the
package, as it is shipped with the ``ICU`` data archives. You can build
a distributable source package that includes all the required ICU data
files (for off-line use) by omitting some relevant lines in the
``.Rbuildignore`` file. The following command sequence should do the
trick:

::

Expand Down Expand Up @@ -93,23 +93,23 @@ standard. Moreover, it has become required by newer versions of ICU4C.

However, if your compiler does not support C++11 or it has not been
properly configured (check out ``<R_inst_dir>/etc/Makeconf``) but you
are sure it tolerates the ``long long`` type (which is very common –
this is checked by the ``configure`` script anyway), you may disable the
use of C++11 by passing the ``--disable-cxx11`` argument to the
``configure`` script.
are sure it understands the ``long long`` type (which is very common –
this is checked by the ``configure`` script anyway), you may disable
C++11 by passing the ``--disable-cxx11`` argument to the ``configure``
script.

Customising the Build Process
-----------------------------

Additional features and options of the ``./configure`` script:

- ``--disable-cxx11``: Disable C++11 use; if you build ICU4C from
sources, make sure your C++ compiler supports the ``long long`` type.
- ``--disable-cxx11``: Disable C++11; if you build ICU4C from sources,
make sure your C++ compiler supports the ``long long`` type.

- ``--disable-icu-bundle``: Force the use of the system ICU.
- ``--disable-icu-bundle``: Enforce system ICU.

- ``--disable-pkg-config``: Disable ``pkg-config`` usage for finding
ICU4C-devel; in such a case ICU4C will be compiled from sources.
- ``--disable-pkg-config``: Disable ``pkg-config``; ICU4C will be
compiled from sources.

- ``--with-extra-cflags=FLAGS``: Additional C compiler flags.

Expand All @@ -123,32 +123,30 @@ Additional features and options of the ``./configure`` script:

Some influential environment variables:

- ``ICUDT_DIR``: Optional directory from which an already downloaded
ICU data archive (``icudt*.zip``) may be copied; either an absolute
path or a path relative to ``<package source dir>/src``; defaults to
``icu55/data``.
- ``ICUDT_DIR``: Optional directory with an already downloaded ICU data
archive (``icudt*.zip``); either an absolute path or a path relative
to ``<package source dir>/src``; defaults to ``icuXX/data``.

- ``PKG_CONFIG_PATH``: An optional list of directories to search for
``pkg-config``\ s ``.pc`` files.

- ``R_HOME``: Override default directory with R installation, e.g.,
``/usr/lib64/R``. Note that ``$R_HOME/bin/R`` should refer to the
``R`` executable.
- ``R_HOME``: Override the R directory, e.g., ``/usr/lib64/R``. Note
that ``$R_HOME/bin/R`` point to the R executable.

- ``CAT``: The ``cat`` command used to generate the list of source
files to compile.

- ``PKG_CONFIG``:The ``pkg-config`` command used to fetch the necessary
compiler flags to link to and existing ``libicu`` installation.

- ``STRINGI_DISABLE_CXX11``: Disable C++11 flags, see also
- ``STRINGI_DISABLE_CXX11``: Disable C++11, see also
``--disable-cxx11``.

- ``STRINGI_DISABLE_PKG_CONFIG``: Force the use of the ICU bundle, see
also ``--disable-pkg-config``.
- ``STRINGI_DISABLE_PKG_CONFIG``: Compile ICU from sources, see also
``--disable-pkg-config``.

- ``STRINGI_DISABLE_ICU_BUNDLE``: Force the use of the system ICU, see
also ``--disable-icu-bundle``.
- ``STRINGI_DISABLE_ICU_BUNDLE``: Enforce system ICU, see also
``--disable-icu-bundle``.

- ``STRINGI_CFLAGS``: see ``--with-extra-cflags``.

Expand All @@ -172,4 +170,4 @@ If you do not manage to set up a successful stringi build, do not
hesitate to `file a bug
report <https://github.com/gagolews/stringi/issues>`__. However, please
check the list of archived (closed) issues first – it is very likely
that a solution to your problem is already there.
that a solution to your problem has already been posted.
11 changes: 9 additions & 2 deletions devel/sphinx/_build/html/_sources/news.rst.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,19 @@ What Is New in *stringi*
1.6.1 (2021-XX-YY) **devel**
----------------------------

- [GENERAL] #401: stringi is now bundled with ICU4C 69.1 (upgraded from
61.1), which is used on most Windows and OS X builds as well as on
\*nix systems not equipped with system ICU. However, if the C++11
support is disabled, stringi will be built against the battle-tested
ICU4C 55.1. The update to ICU brings Unicode 13.0 and CLDR 39
support.

- [DOCUMENTATION] A draft version of a paper on ``stringi`` is now
available at
https://stringi.gagolewski.com/_static/vignette/stringi.pdf

- …todo… #401 (update ICU4C to 69.1), The ICU4C bundle has been updated
from version 61.1 to 69.1 which features Unicode 13.0 and CLDR 39.
- [GENERAL] stringi now requires R >= 3.1 (``CXX_STD`` of ``CXX11`` or
``CXX1X``).

- [NEW FEATURE] #408: ``stri_trans_casefold()`` performs case folding;
this is different from case mapping, which is locale-dependent.
Expand Down
2 changes: 1 addition & 1 deletion devel/sphinx/_build/html/genindex.html
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ <h1 id="index">Index</h1>

<div role="contentinfo">
<p>
&#169; Copyright 2020, Marek Gagolewski.
&#169; Copyright 2013-2021, Marek Gagolewski.

</p>
</div>
Expand Down
11 changes: 6 additions & 5 deletions devel/sphinx/_build/html/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,7 @@ <h1>stringi: THE String Processing Package for R<a class="headerlink" href="#str
<li><p>pattern searching (e.g., with ICU Java-like regular expressions),</p></li>
<li><p>collation and sorting,</p></li>
<li><p>random string generation,</p></li>
<li><p>case mapping,</p></li>
<li><p>case mapping and folding,</p></li>
<li><p>string transliteration,</p></li>
<li><p>Unicode normalisation,</p></li>
<li><p>date-time formatting and parsing,</p></li>
Expand All @@ -215,9 +215,10 @@ <h1>stringi: THE String Processing Package for R<a class="headerlink" href="#str
<p><cite>stringi</cite>’s source code is hosted on <a class="reference external" href="https://github.com/gagolews/stringi">GitHub</a>.
It has been released under the open source BSD-3-clause
<a class="reference external" href="https://raw.githubusercontent.com/gagolews/stringi/master/LICENSE">license</a>.</p>
<p>The package’s API was inspired by Hadley Wickham’s
<a class="reference external" href="https://stringr.tidyverse.org/">stringr</a> package
(and since 2015 <cite>stringr</cite> is powered by <cite>stringi</cite>).
<p>The package’s API was inspired by that of the early (pre-tidyverse; v0.6.2)
version of Hadley Wickham’s
<a class="reference external" href="https://cran.r-project.org/web/packages/stringr/">stringr</a>
package (and since the 2015 v1.0.0 <cite>stringr</cite> is powered by <cite>stringi</cite>).
Moreover, Hadley suggested quite a few new package features.
The contributions from Bartłomiej Tartanus and
<a class="reference external" href="https://github.com/gagolews/stringi/graphs/contributors">many others</a>
Expand Down Expand Up @@ -407,7 +408,7 @@ <h1>stringi: THE String Processing Package for R<a class="headerlink" href="#str

<div role="contentinfo">
<p>
&#169; Copyright 2020, Marek Gagolewski.
&#169; Copyright 2013-2021, Marek Gagolewski.

</p>
</div>
Expand Down
Loading

0 comments on commit 5c27f63

Please sign in to comment.