-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcdl_compress.py
64 lines (45 loc) · 1.61 KB
/
cdl_compress.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/env python
"""CDL encoding transformer fitter
CREATED:2013-05-08 16:15:55 by Brian McFee <[email protected]>
Usage:
./cdl_compress.py n_jobs pca_model.pickle /path/to/octarines/glob
Once we have a transformer model and a set of -encoded.npy files, use transformer to compress them.
Saves output alongside as -encoded-compressed.npy
"""
import os
import sys
import glob
import cPickle as pickle
import numpy as np
from joblib import Parallel, delayed
RETAIN = 0.95
def vectorize(A):
return A.squeeze().reshape((A.shape[0], -1))
def process_song(transformer, d, song, ext):
songname = os.path.basename(song)
songname = songname[:songname.index('-encoded.npy')]
print songname
A = vectorize(np.load(song))
# Transform the data, project to top $RETAIN variance dimensions
Ahat = transformer.transform(A)[:,:d]
outname = '%s/%s-encoded-%s.npy' % (os.path.dirname(song), songname, ext)
np.save(outname, Ahat)
pass
def process_data(n_jobs, transformer, file_glob, ext):
files = glob.glob(file_glob)
files.sort()
d = -1
if hasattr(transformer, 'explained_variance_ratio_'):
d = np.argmax(np.cumsum(transformer.explained_variance_ratio_) >= RETAIN)
Parallel(n_jobs=n_jobs)(delayed(process_song)(transformer, d, song, ext) for song in files)
pass
if __name__ == '__main__':
n_jobs = int(sys.argv[1])
with open(sys.argv[2], 'r') as f:
transformer = pickle.load(f)
file_glob = sys.argv[3]
if len(sys.argv) > 4:
ext = sys.argv[4]
else:
ext = 'compressed'
process_data(n_jobs, transformer, file_glob, ext)