-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup.py
49 lines (43 loc) · 1.77 KB
/
setup.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from TextUtilities.indexer import Indexer
import os
import argparse
from MainImplementation.GameSearcher import GameSearcher
from rich.console import Console
import nltk
from transformers import pipeline
'''
Download nltk corpora which are needed to the analyzer
'''
def downloadNLTKCorpus():
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")
nltk.download("averaged_perceptron_tagger")
def setup():
# defining the parameters of the program
parser = argparse.ArgumentParser(description="Create all indexes (base, sentiment)")
parser.add_argument("-t", "--threads",
dest="nThreads",
type=int,
default=4,
help="Number of threads used to create the indexs. Default number = 4.",
metavar=4)
console = Console()
# arguments parsing
args = parser.parse_args()
if (args.nThreads not in range(1, 11)):
console.log("[red] The number of threads must be in the following range: 1-10")
return
# download AI model used for sentiment analysis
classifier = pipeline("text-classification", model="j-hartmann/emotion-english-distilroberta-base", top_k=None)
# create indexes
if not os.path.exists("indexdir"):
os.mkdir("indexdir")
with console.status("[bold green]Creating base version index...") as status:
Indexer.openIndex("Dataset", "indexdir/base", console, False, args.nThreads)
with console.status("[bold green]Creating sentiment versions index...") as status:
Indexer.openIndex("Dataset", "indexdir/sentiment", console, True, args.nThreads)
console.log(f"All indexes are complete")
if __name__ == "__main__":
downloadNLTKCorpus()
setup()