-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathmain.cpp
47 lines (38 loc) · 1.33 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include "Vocabulary.hpp"
#include "SkipGram.hpp"
#include "Utils.hpp"
#include <iostream>
int main(int argc, char** argv){
int dim = 100;
int windowSize = 1;
int numNegative = 15;
Real learningRate = 0.025;
int wordFreqThreshold = 10;
int nGramFreqThreshold = 10;
std::string trainFile = "./sample_data/sample.txt";
std::string outputFile = "./result";
Utils::procArg(argc, argv,
dim, windowSize, numNegative, learningRate,
wordFreqThreshold, nGramFreqThreshold,
trainFile, outputFile);
printf("### Settings ###\n");
printf("-edim %d\n", dim);
printf("-window %d\n", windowSize);
printf("-neg %d\n", numNegative);
printf("-lr %f\n", learningRate);
printf("-wminfreq %d\n", wordFreqThreshold);
printf("-cminfreq %d\n", nGramFreqThreshold);
printf("-train %s\n", trainFile.c_str());
printf("-output %s\n", outputFile.c_str());
puts("");
Vocabulary voc;
SkipGram sg(voc);
voc = Vocabulary(trainFile, wordFreqThreshold, nGramFreqThreshold);
std::cout << "Word vocabulary size:\t" << voc.tokenListCount.size() << std::endl;
std::cout << "Ngram Vocabulary size:\t" << voc.ngramListCount.size() << std::endl;
sg.init(dim, windowSize, numNegative, trainFile, true);
sg.train(learningRate);
std::cout << "Done!" << std::endl;
sg.save(outputFile);
return 0;
}