-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_utils.py
145 lines (107 loc) · 3.35 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from itertools import combinations
def load_project_dataset(fname, typelabel):
df = pd.read_csv(fname)
N = df['ID_TimeSeries'].max() + 1
F = 4
D = 1007
data=np.zeros((N, F, 1, D))
lb=[]
jj=0
ii=0
index=0
for el in df['Values']:
if(jj==4):
jj=0
ii+=1
el=el.split(';')
data[ii][jj]=np.asarray(list(map(float,el[:1007])))
if(jj==0):
lb.append(df[typelabel][index])
jj+=1
index+=1
labels=np.asarray(lb)
return data, labels
def get_data(df):
N = df.shape[0]
F = 4
D = 1007
data=np.zeros((N, F, 1, D))
lb=[]
ii=0
for index, row in df.iterrows():
one=[]
two=[]
three=[]
four=[]
for jj in range(0,1007):
#print(jj)
#print(row[0][jj][0])
one.append(row[0][jj][0])
two.append(row[0][jj][1])
three.append(row[0][jj][2])
four.append(row[0][jj][3])
for zz in range(0,4):
if zz==0: data[ii][zz]=np.asarray(list(map(float,one)))
if zz==1: data[ii][zz]=np.asarray(list(map(float,two)))
if zz==2: data[ii][zz]=np.asarray(list(map(float,three)))
if zz==3: data[ii][zz]=np.asarray(list(map(float,four)))
lb.append(row['Class'])
ii+=1
labels=np.asarray(lb)
return data, labels
def visualize_timeseries (dataset, n):
for i in range(len(dataset)):
#print(i)
sample = dataset[i]
print(i, sample[0].shape)
print(sample[0])
print("label:",sample[1])
fig, axs = plt.subplots(2, 2)
fig.set_size_inches(14, 7)
axs[0, 0].set_title("1 dimension")
axs[0, 0].plot(np.arange(0, 1007), sample[0][0][-1],'tab:orange')
axs[0, 1].set_title("2 dimension")
axs[0, 1].plot(np.arange(0, 1007), sample[0][1][-1],'tab:green')
axs[1, 0].set_title("3 dimension")
axs[1, 0].plot(np.arange(0, 1007), sample[0][2][-1],'tab:red')
axs[1, 1].set_title("4 dimension")
axs[1, 1].plot(np.arange(0, 1007), sample[0][3][-1])
fig.tight_layout()
plt.show()
if i == n-1:
return
class TripletSelector:
"""
Implementation should return indices of anchors, positive and negative samples
return np array of shape [N_triplets x 3]
"""
def __init__(self):
pass
def get_triplets(self, embeddings, labels):
raise NotImplementedError
class AllTripletSelector(TripletSelector):
"""
Returns all possible triplets
May be impractical in most cases
"""
def __init__(self):
super(AllTripletSelector, self).__init__()
def get_triplets(self, embeddings, labels):
labels = labels.cpu().data.numpy()
triplets = []
for label in set(labels):
label_mask = (labels == label)
label_indices = np.where(label_mask)[0]
if len(label_indices) < 2:
continue
negative_indices = np.where(np.logical_not(label_mask))[0]
anchor_positives = list(combinations(label_indices, 2)) # All anchor-positive pairs
# Add all negatives for all positive pairs
temp_triplets = [[anchor_positive[0], anchor_positive[1], neg_ind] for anchor_positive in anchor_positives
for neg_ind in negative_indices]
triplets += temp_triplets
return torch.LongTensor(np.array(triplets))