Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

czech support added #8

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 11 additions & 6 deletions SnowballC/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@ Authors@R: person("Milan", "Bouchet-Valat", email="[email protected]",
Description: An R interface to the C 'libstemmer' library that implements
Porter's word stemming algorithm for collapsing words to a common
root to aid comparison of vocabulary. Currently supported languages are
Arabic, Basque, Catalan, Danish, Dutch, English, Finnish, French, German, Greek,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why remove some languages?

Hindi, Hungarian, Indonesian, Irish, Italian, Lithuanian, Nepali,
Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish, Tamil
Danish, Dutch, English, Finnish, French, German, Hungarian, Italian,
Norwegian, Portuguese, Romanian, Russian, Spanish, Swedish
and Turkish.
License: BSD_3_clause + file LICENSE
Copyright: Dr Martin Porter (2001) and Richard Boulton (2004, 2005)
for the 'libstemmer' C library,
and Milan Bouchet-Valat (2013) for the R package contents.
Copyright: Dr Martin Porter (2001) and Richard Boulton (2004, 2005) for
the 'libstemmer' C library, and Milan Bouchet-Valat (2013) for
the R package contents.
URL: https://github.com/nalimilan/R.TeMiS
BugReports: https://github.com/nalimilan/R.TeMiS/issues
NeedsCompilation: yes
Packaged: 2020-04-01 16:24:55 UTC; milan
Author: Milan Bouchet-Valat [aut, cre]
Maintainer: Milan Bouchet-Valat <[email protected]>
Repository: CRAN
Date/Publication: 2020-04-01 16:50:02 UTC
95 changes: 95 additions & 0 deletions SnowballC/MD5
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
0bac4b0072731204c1d7d25b54f71b16 *DESCRIPTION
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this file actually used? I don't think so.

99bc125b0a438dba3cfa01fb9a9e8fd6 *LICENSE
b61e83bbd759e889c8887424337e1823 *NAMESPACE
eada0b70de71c203bf751e6521ff0d8c *NEWS
b513ed2bf584d2785c2587a213650138 *R/stem.R
846e2cc06cc63146a7db4324e70d37f7 *inst/words.R
6450f60353bf64dfb3fb27f7b47a287d *inst/words/arabic.RData
a4b699a8607bdd110d01bb7166a91b8d *inst/words/basque.RData
11eae93b28980e558d1cadffac9f3654 *inst/words/catalan.RData
4d22f94310122d16d0255a271a3fac7c *inst/words/danish.RData
70b0d424c87664a50f05f3305be9badf *inst/words/dutch.RData
8b99334b63f76f5a5d1cd1eee84a8c0c *inst/words/english.RData
2575910382189eb55832547ba9506436 *inst/words/finnish.RData
12b44b31e52db934da4aee2fd8f51d87 *inst/words/french.RData
d7de4275726019815bf21446355beeb8 *inst/words/german.RData
8f219a61aedc4b183a640c4be7b4c76e *inst/words/greek.RData
5fe61461702aca290f3edc4f9fc0f9fe *inst/words/hindi.RData
507e68c5b9ea43119c394614ae2a0f82 *inst/words/hungarian.RData
79973d5437212d6fcd1fa4685ca21658 *inst/words/indonesian.RData
c32b0b7607daff0f5e6bcf7cb658e22c *inst/words/irish.RData
085956d37b512c5af95654d4d5afa6d1 *inst/words/italian.RData
0f484d0bce7dcb3c828ca018b4727a3f *inst/words/lithuanian.RData
1a35c9562b0e90015ba6996d019610fb *inst/words/nepali.RData
d7b7b8a277d8335880c34755c12f15a0 *inst/words/norwegian.RData
3918bfa0f782c6698f8fdbf78a0a41af *inst/words/porter.RData
8853c24d92a950f48d9cac6f809afabc *inst/words/portuguese.RData
7c195efe6cfda0f07d6c36e5b0773e83 *inst/words/romanian.RData
6fdbb7f751b657898f1f0a4936357fef *inst/words/russian.RData
2c6a01db0ccb42de4e4e67a49f14e83b *inst/words/spanish.RData
bbcd85a9eb03ecbd409b1f942aa10d5b *inst/words/swedish.RData
c17e17cedd554ab4f32f98927c86f18d *inst/words/tamil.RData
123e76d7593bddd3bcf1365a5b6f16ef *inst/words/turkish.RData
5834354a58bc2954926203a024097284 *man/getStemLanguages.Rd
6470f197eb9ade66ef751a437b3aa55c *man/wordStem.Rd
5164d6c8dddffcef63ffde83bd0787ea *src/SnowballC_init.c
afecf06b1a49f45c0ae5e7966e1f55e8 *src/api.c
3463c7786b95691c5ea34988c7e6dcd0 *src/api.h
5cbdbe434d6f76bbf13c3a6f34d0fffc *src/header.h
e15a358acc0b1263cbf0518a78954e62 *src/libstemmer.h
dfd1562c0fec4117b918348d894a1b35 *src/libstemmer_utf8.c
025a309bfcb95d86b0ff68dc1c4e4539 *src/modules_utf8.h
8507d877fc894bfe04a001977ee0ec29 *src/stem.c
956228113294ed2d1fbe9dec0c4a42a0 *src/stem_UTF_8_arabic.c
460b9f5afb49daa57e0f5cef4d84eda5 *src/stem_UTF_8_arabic.h
d1bf2b3177cb826adaa252591222d1ba *src/stem_UTF_8_basque.c
f135c596760d9c329b47d7ec8f7f71b3 *src/stem_UTF_8_basque.h
2ebbe4fe036716fe9bf38de246fe1ae9 *src/stem_UTF_8_catalan.c
e4ca693bab46d574ad486804325ec636 *src/stem_UTF_8_catalan.h
4cf66919bc01331e31d0801e0f3515df *src/stem_UTF_8_danish.c
9d8c04c78d17512fd3d2fd2701ef2c33 *src/stem_UTF_8_danish.h
9db37371b26d49a947bc5922ad1e1936 *src/stem_UTF_8_dutch.c
9657e245e0ec26c5119f197715d6b2c1 *src/stem_UTF_8_dutch.h
cfa8d04cd2f1367dc19d42beaedbb4c6 *src/stem_UTF_8_english.c
4daca332253f79e2506c17418da55c4a *src/stem_UTF_8_english.h
de413e13cb9d433bf68ccfdddd01d1f4 *src/stem_UTF_8_finnish.c
8e5464d364b4fd5cc740ce2e627d7c00 *src/stem_UTF_8_finnish.h
cfdd9d3675d18568ba128dfee83939d4 *src/stem_UTF_8_french.c
9dd4d964c9358f42e820f257d134e9ad *src/stem_UTF_8_french.h
2b32b5c597c71c8b141668a1ab14b434 *src/stem_UTF_8_german.c
0f4cac0536d0f6cdd7b2e582e5ac33dd *src/stem_UTF_8_german.h
22a12029a7ea789ee4170ab596ee3f03 *src/stem_UTF_8_greek.c
ea580524124dd3ae77e68417fbee3723 *src/stem_UTF_8_greek.h
312f3185d4837bdb246503e909111f99 *src/stem_UTF_8_hindi.c
e9ab5fe76115b14e3a251d3c06ffe81c *src/stem_UTF_8_hindi.h
85ee76a5d5f91ce9c96dd3e8ae52288e *src/stem_UTF_8_hungarian.c
97a0d9193bef54e403784071964d924a *src/stem_UTF_8_hungarian.h
e49d051339f151bf3b7b10cbc1917ce8 *src/stem_UTF_8_indonesian.c
7bb3f1a5438ad8fd8c3ab354d397c40a *src/stem_UTF_8_indonesian.h
a3441812d039944ebfa1b8be2b9b29b2 *src/stem_UTF_8_irish.c
752c9cf7652b89feb273458920abc94a *src/stem_UTF_8_irish.h
0940415ef1f91a2d55f025e1d9d4df36 *src/stem_UTF_8_italian.c
c224a7446e7986fec86ed0ad7449e856 *src/stem_UTF_8_italian.h
31244e47a86893031925f1419e580d77 *src/stem_UTF_8_lithuanian.c
632acac6c0d7f10ef1e39a0a57b9d445 *src/stem_UTF_8_lithuanian.h
326362b4041651ab48cade91b4c2fb50 *src/stem_UTF_8_nepali.c
7a2ac6ace976b699a14e8fb9d5bf00a8 *src/stem_UTF_8_nepali.h
72bf7dead71e4c00481ee0bd92807d20 *src/stem_UTF_8_norwegian.c
08b6829e3354f59d6629b6bdd49ead1e *src/stem_UTF_8_norwegian.h
9be9e96031d19ae095d287600707c47a *src/stem_UTF_8_porter.c
42ea7000a622500a6ce68f09de87da4d *src/stem_UTF_8_porter.h
6015a36e930ff9cde2bf7c2c42ef90fb *src/stem_UTF_8_portuguese.c
4d7ed06b33625652a5228cb210930b5b *src/stem_UTF_8_portuguese.h
b8fce9a4018def8d3ec1a391a05c94ab *src/stem_UTF_8_romanian.c
717f5ec945418cd8ba6746bcab956f1c *src/stem_UTF_8_romanian.h
5db796a1a35e624105c81903ee90ebfd *src/stem_UTF_8_russian.c
9f360b012038fdbda068173dcfaba488 *src/stem_UTF_8_russian.h
2b70e877ee62a279813265e96be629b4 *src/stem_UTF_8_spanish.c
c2c303e630b5406e19454c6384c97cb2 *src/stem_UTF_8_spanish.h
91087c38378307455f8d449ae234f56d *src/stem_UTF_8_swedish.c
3ee381e1eb59aae7afe35096f5b21b0c *src/stem_UTF_8_swedish.h
3ac8cd386e94f3c842c39f508f7c9742 *src/stem_UTF_8_tamil.c
6ea3905ce441aaf3e404043de3aca905 *src/stem_UTF_8_tamil.h
7bc4e7d4635c856ca611c6776bdeef0e *src/stem_UTF_8_turkish.c
5cd0f8a6ac45238e47cc6dd815b59c52 *src/stem_UTF_8_turkish.h
cebf1e3b0913b6950638712a55a4fcd1 *src/utilities.c
5 changes: 5 additions & 0 deletions SnowballC/src/modules_utf8.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "stem_UTF_8_arabic.h"
#include "stem_UTF_8_basque.h"
#include "stem_UTF_8_catalan.h"
#include "stem_UTF_8_czech.h"
#include "stem_UTF_8_danish.h"
#include "stem_UTF_8_dutch.h"
#include "stem_UTF_8_english.h"
Expand Down Expand Up @@ -66,6 +67,9 @@ static const struct stemmer_modules modules[] = {
{"ca", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"cat", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"catalan", ENC_UTF_8, catalan_UTF_8_create_env, catalan_UTF_8_close_env, catalan_UTF_8_stem},
{"cz", ENC_UTF_8, czech_UTF_8_create_env, czech_UTF_8_close_env, czech_UTF_8_stem},
{"cze", ENC_UTF_8, czech_UTF_8_create_env, czech_UTF_8_close_env, czech_UTF_8_stem},
{"czech", ENC_UTF_8, czech_UTF_8_create_env, czech_UTF_8_close_env, czech_UTF_8_stem},
{"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
{"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
{"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem},
Expand Down Expand Up @@ -147,6 +151,7 @@ static const char * algorithm_names[] = {
"arabic",
"basque",
"catalan",
"czech",
"danish",
"dutch",
"english",
Expand Down
Loading