-
Notifications
You must be signed in to change notification settings - Fork 0
/
run_pca.py
24 lines (18 loc) · 867 Bytes
/
run_pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from main.pca_analysis import PCA_analysis
from main.data_loader import DataLoader
data_loader = DataLoader('data/SemEval2016-Task3-CQA-QL-train-part1-subtaskA.xml',
'data/SemEval2016-Task3-CQA-QL-train-part2-subtaskA.xml',
'data/SemEval2016-Task3-CQA-QL-dev-subtaskA.xml',
'data/test_input.xml')
pca_a = PCA_analysis(data_loader)
scenario = 3
# Run PCA for visualization
if scenario == 1:
X_r, idx_r, X_v, idx_v = pca_a.run(n_components=2, downsampling=None)
pca_a.visualize_pca_questions(X_r=X_r[:100], idx=idx_r[:100])
# Run PCA with higher number of components
if scenario == 2:
X_r, idx_r, X_v, idx_v = pca_a.run(n_components=16, downsampling=None)
# TEST SCENARIO
if scenario == 3:
X_r, idx_r, X_v, idx_v = pca_a.run(n_components=18, downsampling=None, test=True)