From 0a5437acdde8aa6a4ed0f00e88213d349fefa5f5 Mon Sep 17 00:00:00 2001 From: ivan Date: Fri, 10 Nov 2017 07:46:53 +0500 Subject: [PATCH] add wikipedia --- list.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/list.json b/list.json index 6cda86e..e7b61a7 100644 --- a/list.json +++ b/list.json @@ -1,5 +1,15 @@ { "corpora": { + "wiki-en": { + "description": "Extracted Wikipedia dump from October 2017. Produced by `python -m gensim.scripts.segment_wiki -f enwiki-20171001-pages-articles.xml.bz2 -o wiki-en.gz`", + "checksum-0": "a7d7d7fd41ea7e2d7fa32ec1bb640d71", + "checksum-1": "b2683e3356ffbca3b6c2dca6e9801f9f", + "checksum-2": "c5cde2a9ae77b3c4ebce804f6df542c2", + "checksum-3": "00b71144ed5e3aeeb885de84f7452b81", + "file_name": "wiki-en.gz", + "source": "https://dumps.wikimedia.org/enwiki/20171001/", + "parts": 4 + }, "text8": { "description": "Cleaned small sample from wikipedia", "checksum": "68799af40b6bda07dfa47a32612e5364",