-
Notifications
You must be signed in to change notification settings - Fork 6
/
attribute_datasets.py
248 lines (227 loc) · 10.2 KB
/
attribute_datasets.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
"""
Module provides iterator functions for the PARSE-27k dataset.
"""
import numpy as np
import os, re
from itertools import izip
from collections import OrderedDict
import sqlite3
class PersonExample(object):
''' Manage one annotated pedestrian example
- all labels are in 'softmax' notation, i.e. (0..N-1) for N classes,
where 0 is the NA label.
- This class provides the functionality to 'mirror' labels
'''
def __init__(self, attributes, valid_values,
pid, image_filename, box, labels):
self.attributes = attributes
self.valid_values = valid_values
self.pid = pid
self.image_filename = image_filename
self.box = box
self._labels = OrderedDict(zip(self.attributes, labels))
def label(self, attr_idx, mirrored=False):
return self.labels(mirrored=mirrored)[attr_idx]
def labels(self, mirrored=False):
# mirror the labels for this example
if not mirrored:
return self._labels.values() # _labels is an ordered dict
else:
labels_out = []
binary_mirror_attributes = {
'HasBagInHandLeft' : 'HasBagInHandRight',
'HasBagInHandRight' : 'HasBagInHandLeft',
'HasBagOnShoulderLeft' : 'HasBagOnShoulderRight',
'HasBagOnShoulderRight': 'HasBagOnShoulderLeft' }
for attr, value in self._labels.items():
if attr == 'Orientation12':
raise ValueError('we cannot mirror Ori12 yet')
elif attr == 'Orientation4':
# input in softmax notation:
# 0 na, 1 front, 2 back, 3 left, 4 right
# translation permutation: (0)(1)(2) (3,4)
table = {0:0, 1:1, 2:2,
3:4, 4:3}
try:
out = table[value]
except KeyError:
msg = 'invalid Orientation label in softmax notation: {}'
raise ValueError(msg.format(value))
labels_out.append(out)
elif attr == 'Orientation8':
# input in softmax notation:
# 0 na, 1 front, 2 front-right, 3 right
# 4 back-right, 5 back, 6 back-left, 7 left, 8 front-left
# translation permutation: (0)(1)(2,8)(3,7)(4,6)(5)
table = {0:0, 1:1,
2:8, 8:2,
3:7, 7:3,
4:6, 6:4,
5:5}
try:
out = table[value]
except KeyError:
msg = 'invalid Orientation8 label in softmax notation: {}'
raise ValueError(msg.format(value))
labels_out.append(out)
elif attr in binary_mirror_attributes.keys():
labels_out.append(self._labels[binary_mirror_attributes[attr]])
else:
labels_out.append(value) # no change for mirrored example
return labels_out
def _one_hot(self, idx, max_n):
z = np.zeros(max_n)
z[idx] = 1
return z
def label_one_hot(self, attr_idx=None, mirrored=False):
label = self.label(attr_idx=attr_idx, mirrored=mirrored)
return self._one_hot(label, self.valid_values[attr_idx])
def labels_one_hot(self, attr_idx=None, mirrored=False):
labels = self.labels(mirrored)
return [ self._one_hot(l, self.valid_values[idx]) for idx,l in enumerate(labels) ]
class DataPreprocessorPARSE(object):
def __init__(self, pathToDataset,
fnAnnoDB='annotations.sqlite3',
attributes='all',
split='train',
exclude_sitting=True,
verbose=False):
"""reads annotation from sqlite3 db at fnAnno, and assumes images to be
at pathToDataset
- pathToDataset path to sequence directories containing the images
- fnAnnoDB sqlite3 annotation database
- attributes list of attributes to return
- split which examples to iterate over (train,val,test)
- exclude_sitting - if True treat Posture as binary (NA, walking, standing)
(there are only very few sitting examples, and we did not use these in our work)
- verbose - show SQL query
"""
self.__dict__.update(locals())
self.pathToSequences = os.path.join(pathToDataset, 'sequences')
self.ALL_ATTRIBUTES = ('Orientation',
'Orientation8',
'Gender',
'Posture',
'HasBagOnShoulderLeft', 'HasBagOnShoulderRight',
'HasBagInHandLeft', 'HasBagInHandRight',
'HasTrolley',
'HasBackpack',
'isPushing',
'isTalkingOnPhone')
if not self.exclude_sitting:
self.VALID_VALUES = [5,9,3,4,3,3,3,3,3,3,3,3] # max N per item in ALL_ATTRIBUTES
else:
self.VALID_VALUES = [5,9,3,3,3,3,3,3,3,3,3,3] # max N per item in ALL_ATTRIBUTES
if isinstance(attributes, str) and attributes.lower() == 'all':
self.attributes = self.ALL_ATTRIBUTES
self.valid_values = self.VALID_VALUES
elif isinstance(attributes, list) and len(attributes) > 0:
self.attributes = attributes
self.valid_values = [self.VALID_VALUES[self.ALL_ATTRIBUTES.index(a)] for a in self.attributes]
else:
raise ValueError('invalid attribute selection')
self.TRAIN_SEQUENCES = (1, 4, 5)
self.VAL_SEQUENCES = (2, 7, 8)
self.TEST_SEQUENCES = (3, 6)
self.TRAINVAL_SEQUENCES = (1, 4, 5, 2, 7, 8)
self.ALL_SEQUENCES = (1, 2, 3, 4, 5, 6, 7, 8)
if split == 'train':
self.sequenceIDs = self.TRAIN_SEQUENCES
elif split == 'val':
self.sequenceIDs = self.VAL_SEQUENCES
elif split == 'test':
self.sequenceIDs = self.TEST_SEQUENCES
else:
raise ValueError('invalid split')
self.examples = self._read_examples_from_db()
def _read_examples_from_db(self):
try:
self.dbFile = os.path.join(self.pathToDataset, self.fnAnnoDB)
self.db = sqlite3.connect(self.dbFile)
self.dbc = self.db.cursor()
except sqlite3.Error as e:
raise Exception(e)
query = '''
SELECT s.directory as directory,
i.filename as filename,
p.pedestrianID as pid,
p.box_min_x as min_x, p.box_min_y as min_y,
p.box_max_x as max_x, p.box_max_y as max_y,
{0}
FROM Pedestrian p
INNER JOIN AttributeSet a ON p.attributeSetID = a.attributeSetID
INNER JOIN Image i ON p.imageID = i.imageID
INNER JOIN Sequence s on s.sequenceID = i.sequenceID
'''.format(', '.join((a+'ID' for a in self.attributes)))
if self.exclude_sitting:
query += ' WHERE a.postureID <> 4 ' # filter out all 'sitting' examples
if self.sequenceIDs:
query += 'AND i.sequenceID IN ' + str(self.sequenceIDs)
else:
if self.sequenceIDs:
query += ' WHERE i.sequenceID IN ' + str(self.sequenceIDs)
if self.verbose:
print(query)
results = self.dbc.execute(query).fetchall()
examples = []
for row in results:
# tuples are: (PedestrianID, ... all the attributes)
fullFileName = os.path.join(self.pathToSequences, row[0], row[1])
box = tuple(row[3:7])
pid = str(row[2])
labels = self._translate_db_labels_to_softmax(row[7:])
p = PersonExample(self.attributes, self.valid_values,
pid, fullFileName.encode('utf-8'),
box, labels)
examples.append(p)
self.db.close()
return examples
def _translate_db_label_to_softmax(self, attribute, label):
"""
translates a label from the sqlite database to a softmax label.
The softmax range is (0,1,...,N) - where 0 is the N/A label.
(0=NA, 1=POS, 2=NEG)
(0=NA, 1=front, 2=back, 3=left, 4=right)
"""
msg = 'unexpected label - attribute: {} - value: {}'
if not isinstance(label, int):
raise TypeError('label expected to be integer')
if not attribute in self.attributes:
raise ValueError('invalid attribute')
# translate to range [0,1,..N]
# by convention we handled the male as the 'pos' label
# this can have an influence on the exact value of AP scores
if attribute == 'Posture':
if self.exclude_sitting: # if we handle Posture as binary (the default)
if label == 3: # standing -> pos (less frequent)
out = 1
elif label == 2: #walking -> neg (the more frequent class)
out = 2
elif label == 1:
out = 0
else:
raise ValueError(msg.format(attribute, label))
else:
out = label - 1
return out
def _translate_db_labels_to_softmax(self, labels):
"""
applies translation all attributes
- should be useful when we support working only on a subset of
attributes
"""
out_labels = []
if len(self.attributes) != len(labels):
msg = 'length of labels does not match my attribute count!'
raise ValueError(msg)
out_labels = [self._translate_db_label_to_softmax(a, l) for (a, l)
in izip(self.attributes, labels)]
return out_labels
@property
def all_examples(self):
return list(self.examples) # return a copy of our internal list
def __iter__(self):
"""iterate over all annotation examples
"""
for example in self.examples:
yield example