From 78ab3c22e1f5939e94b007a78ce16eb62c0b213d Mon Sep 17 00:00:00 2001 From: wangchaoqun56 Date: Thu, 18 Apr 2019 01:15:11 +0800 Subject: [PATCH] init --- .gitignore | 1 + readme.md | 24 + requirements.txt | 4 + tracking/Logger.py | 26 + tracking/configer.py | 79 +++ tracking/funs_tracking.py | 1077 +++++++++++++++++++++++++++++++++++++ tracking/tf_utis.py | 70 +++ tracking/tracker.py | 289 ++++++++++ tracking/utis.py | 798 +++++++++++++++++++++++++++ tracking/vgg19_tf.py | 177 ++++++ tracking/vgg_utis.py | 188 +++++++ 11 files changed, 2733 insertions(+) create mode 100644 .gitignore create mode 100644 readme.md create mode 100644 requirements.txt create mode 100644 tracking/Logger.py create mode 100644 tracking/configer.py create mode 100644 tracking/funs_tracking.py create mode 100644 tracking/tf_utis.py create mode 100644 tracking/tracker.py create mode 100644 tracking/utis.py create mode 100644 tracking/vgg19_tf.py create mode 100644 tracking/vgg_utis.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..93e173c --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.bmp diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..1b71418 --- /dev/null +++ b/readme.md @@ -0,0 +1,24 @@ +# VGG_CF +Visual Object Tracking based on Correlation Filter using VGG feature. About 2s per frame, can be faster via optimize the code. +Precision rate:86.3%, Success rate: 60.3% + +# Dependences +CUDA8.0 +cudnn6.0 +python==3.6 +GPU ~=4.5G memory + +# Tracking +1. git clone https://github.com/wangchaoqun56/VGG_CF.git +2. pip install -r requirments.txt +3. cd tracking + eidt configer.py + data_path: path to dataset + vgg_model_path: path to vgg19 model pretrained by ImageNet +4. python tracker.py -s 0 -e 100 + --start: index of first sequence + --end: index of last sequence + --gpu: gpu id default='0' + +# Other +download vgg19.npy from [vgg19.npy](https://mega.nz/#!xZ8glS6J!MAnE91ND_WyfZ_8mvkuSa2YcA7q-1ehfSm-Q1fxOvvs) \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1e65390 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,4 @@ +tensorflow-gpu==1.2.1 +scipy==1.2.1 +pillow==6.0.0 +scikit_image==0.15.0 \ No newline at end of file diff --git a/tracking/Logger.py b/tracking/Logger.py new file mode 100644 index 0000000..9192242 --- /dev/null +++ b/tracking/Logger.py @@ -0,0 +1,26 @@ +# Logger.py + +import logging + +class Logger(): + def __init__(self, logname, loglevel, logger): + + self.logger = logging.getLogger(logger) + self.logger.setLevel(logging.DEBUG) + + fh = logging.FileHandler(logname) + fh.setLevel(logging.DEBUG) + + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + + formatter = logging.Formatter('%(asctime)s-%(name)s-%(levelname)s-%(message)s') + #formatter = format_dict[int(loglevel)] + fh.setFormatter(formatter) + ch.setFormatter(formatter) + + self.logger.addHandler(fh) + self.logger.addHandler(ch) + + def getlog(self): + return self.logger diff --git a/tracking/configer.py b/tracking/configer.py new file mode 100644 index 0000000..dd98e5d --- /dev/null +++ b/tracking/configer.py @@ -0,0 +1,79 @@ +import os +import time + +import numpy as np +import tensorflow as tf + +import vgg19_tf +from funs_tracking import * +from Logger import * +from vgg_utis import vgg_process_images, vgg_resize_maps + +######################### +##### gpu parameter ##### +######################### +gpu_id = '/gpu:0' +config = tf.ConfigProto(allow_soft_placement=True) +config.gpu_options.allow_growth = True + +######################### +#### data params ######## +######################### + +# data_path = r'/data/cyy/Data/CVPR2013Bechmark_Color/' +# cache_path = r'/data/cyy/Results/vgg_cf_all_scale' +data_path = r'/data3/TB-100/OTB2015' +cache_path = r'./Results' +if not os.path.isdir(cache_path): + os.mkdir(cache_path) +pstr = 'gcnn' + +#### +padding = {'large': 1, 'height': 0.4, 'generic': 2} # 25~50: 2.5 others 2.2 +cell_size0 = 4 +batch_size = 1 # fixed +max_win2 = 1600 +min_win2 = 1600 +fea_sz = np.asarray([57, 57]) +######################### +####### VGG Model ####### +######################### + +vgg_model_path = '/data2/wangchaoqun/model/vgg19.npy' +vgg_batch_size = 1 +vgg_out_layers = np.asarray((10, 11, 12, 14, 15, 16)) + + +vgg_is_lrn = False + +# image processing params for vgg +img_param = {} +img_param['interp_tool'] = 'misc' # misc or skimage +img_param['interp'] = 'bilinear' +img_param['normal_hw'] = (224, 224) +img_param['normal_type'] = 'keep_all_content' + +################################## +###### graph parameters ########## +################################## + +gh1 = {'height_width': None, 'number_edges': 4, 'metric': 'euclidean', 'normalized_laplacian': True} + +# pca params +pca_is_mean = True +pca_is_norm = False +pca_energy = 100 +#### +nn_p = 6 # +nn_K = 20 +nn_gamma = 1.0 + +####################### cf params ############################### +search_scale = fun_get_search_scale() + +kernel_sigma = 0.5 +kernel_type = 'linear' +kernel_gamma = 1 # 1.e-6 +update_factor = 0.0075 # Jogging learning 0.005, others 0.015 +cf_nframe_update = 1 +weight_update_factor = 0.01 diff --git a/tracking/funs_tracking.py b/tracking/funs_tracking.py new file mode 100644 index 0000000..0f1757d --- /dev/null +++ b/tracking/funs_tracking.py @@ -0,0 +1,1077 @@ +''' +funs_tracking.py +''' + +import numpy as np +import scipy.misc as MISC +#from theano.tensor.signal.feaprojecting import * +#from theano.tensor.signal.feapoolingRT import * +from PIL import Image, ImageFont, ImageDraw +#from fhog import * +import subprocess +import scipy.ndimage as ndimage +from utis import * + +from skimage.morphology import erosion, dilation, opening, closing, white_tophat +#from skimage.morphology import black_tophat, skeletonize, convex_hull_image +from skimage.morphology import disk +from scipy import interpolate +import scipy.misc as MISC +from scipy.spatial.distance import cdist +# import time +''' +close_logger(logger) +fun_correlation_singlesample(xf,yf,sigma,filter_type) +fun_cos_win(h,w) +get_ctr_shift_trackers(responses,weight_trackers) +get_weights_bmap(batch_bmap_y) +fun_binarize_map(prop) +fun_get_binary_map(win_size,rct_size,cell_size) +fun_get_freq_fea(x,is_cos_window) +fun_get_max_response(x1,x2,x3,x4) +fun_get_patch(x,ctr,sz) +fun_get_patch_warping(img,ctr,tmp_sz,win_sz) +fun_get_peak_map(win_size,rct_size,cell_size,fea_size,isroll) +fun_get_search_window(target_sz,im_sz,padding) +fun_get_search_ctr(rct,factor=1.) +fun_get_strnn_list(srnn_directions) +fun_kernel_multisample(xf,yf,kernel_type,sigma) +fun_draw_rct_on_image(im,fname,rct1,rct2,rct3) +fun_process_binary_image(x) +fun_response(model_w,xf,zf,kernel_type,sigma,gamma) +fun_shift_feas(x,px,directions,step,outx) +fun_vggfea_list2array(xlist,nmh,nmw,interp_type,flag,outx) +fun_w(xf,yf,kernel_type,sigma,gamma) +''' + + +def fea_pca_tr(x, p, energy, is_mean=True, is_norm=True): + # x: h*w*d + h, w, d = x.shape + assert(d % p == 0) + d2 = np.int32(d / p) + ss = 0 + projections = [] + for ii in range(p): + z = np.reshape(x[:, :, ss:ss + d2], (h * w, d2)) + ss = ss + d2 + proj = pca_tr(z, energy, is_mean, is_norm) + projections.append(proj) + + return projections + + +def fea_pca_te(x, p, projections): + # x: h*w*d + h, w, d = x.shape + assert(d % p == 0) + d2 = np.int32(d / p) + ss = 0 + o = [] + for ii in range(p): + z = np.reshape(x[:, :, ss:ss + d2], (h * w, d2)) + ss = ss + d2 + z = pca_te(z, projections[ii]) + # z = np.expand_dims(z,axis = 0) + o.append(z) + + # return np.concatenate(o, axis=0) + return o + # return np.reshape(np.concatenate(o, axis = -1),(h,w,-1)) + + +def compute_hw(mx_hh0, mx_ww0, mx_res0): + msk_idx = np.logical_not(np.isnan(mx_res0)) + mx_hh = np.copy(mx_hh0) + mx_ww = np.copy(mx_ww0) + #assert len(msk_idx) == 6 + n = len(msk_idx) + n2 = np.int32(n / 2) + idx = np.where(msk_idx < 0.5) + mx_hh[idx] = 0 + mx_ww[idx] = 0 + mx_res0[idx] = 0 + + sgn_h = np.sum(np.sign(mx_hh[n2:])) + if sgn_h == n - n2: + idx = np.where(mx_hh[0:n2] < 0) + mx_hh[idx] = 0 + elif sgn_h == n2 - n: + idx = np.where(mx_hh[0:n2] > 0) + mx_hh[idx] = 0 + + sgn_w = np.sum(np.sign(mx_ww[n2:])) + if sgn_w == n - n2: + idx = np.where(mx_ww[0:n2] < 0) + mx_ww[idx] = 0 + elif sgn_w == n2 - n: + idx = np.where(mx_ww[0:n2] > 0) + mx_ww[idx] = 0 + #idx = np.where(msk_idx > 0.5) + mx_w = np.mean(mx_ww[msk_idx]) # [mx_layer] + mx_h = np.mean(mx_hh[msk_idx]) # [mx_layer] + mxres = np.mean(mx_res0[msk_idx]) # [mx_layer] + return mx_w, mx_h, mxres + + +def fun_get_patch_warping(img, ctr, tmp_sz, win_sz): + # [w,h] + + img = np.float32(img) + if len(img.shape) == 3: + isColor = True + else: + isColor = False + h = img.shape[0] + w = img.shape[1] + + x = np.arange(1, win_sz[0] + 1) - win_sz[0] / 2 + 0.5 + y = np.arange(1, win_sz[1] + 1) - win_sz[1] / 2 + [x, y] = np.meshgrid(x, y) + p3 = tmp_sz[0] / win_sz[0] + # print p3,p3*tmp_sz[1]/win_sz[1] + yp = ctr[1] + y * (p3 * tmp_sz[1] / win_sz[1]) - 1 + xp = ctr[0] + x * p3 - 1 + + # save_mat_file('warping.mat',x,y,xp,yp) #?? + ## + x0 = np.int32(xp) + x1 = x0 + 1 + y0 = np.int32(yp) + y1 = y0 + 1 + + rx0 = xp - x0 + rx1 = 1 - rx0 + ry = yp - y0 + + # -- + + x0_bool = (x0 < 0) + (x0 > w - 1) + x1_bool = (x1 < 0) + (x1 > w - 1) + y0_bool = (y0 < 0) + (y0 > h - 1) + y1_bool = (y1 < 0) + (y1 > h - 1) + + x0[x0_bool] = 0 + x1[x1_bool] = 0 + y0[y0_bool] = 0 + y1[y1_bool] = 0 + + if isColor == True: + patch = np.zeros((win_sz[1], win_sz[0], 3)) + for ii in range(3): + patch[:, :, ii] = (rx1 * img[y0, x0, ii] * (~(y0_bool + x0_bool)) + rx0 * img[y0, x1, ii] * (~(y0_bool + x1_bool))) * (1 - ry) + \ + (rx1 * img[y1, x0, ii] * (~(y1_bool + x0_bool)) + rx0 * img[y1, x1, ii] * (~(y1_bool + x1_bool))) * ry + else: + patch = (rx1 * img[y0, x0] * (~(y0_bool + x0_bool)) + rx0 * img[y0, x1] * (~(y0_bool + x1_bool))) * (1 - ry) + \ + (rx1 * img[y1, x0] * (~(y1_bool + x0_bool)) + rx0 * img[y1, x1] * (~(y1_bool + x1_bool))) * ry + + patch[patch < 0] = 0 + patch[patch > 255] = 255 + return np.uint8(patch + 0.5) + + +def fun_process_binary_image(x): + [n, c, h, w] = x.shape + selem = disk(3) + y = np.zeros((n, c, h, w), x.dtype) + for ii in range(n): + for jj in range(c): + im = closing(np.asarray(x[ii, jj], dtype=np.uint8), selem) + + im = ndimage.binary_erosion(im, structure=np.ones((3, 3))).astype(x.dtype) + im = np.array(im, dtype=np.uint8) + ret, thresh = cv2.threshold(im, 0.5, 1, 0) + contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + cnt = contours[0] + x0, y0, w0, h0 = cv2.boundingRect(cnt) + y[ii, jj, y0:y0 + h0, x0:x0 + w0] = 1 + + #x = ndimage.binary_fill_holes(x).astype(x.dtype) + #x = ndimage.binary_erosion(x, structure=np.ones((4,4)),iterations=2).astype(x.dtype) + flag = True + if np.sum(y) < 1: + flag = False + return y, flag + + +def compute_kernel_size(target_sz, cell_size, factor=1.): + kl = np.int32(factor * target_sz / cell_size + 0.5) + if kl[0] % 2 == 0: + kl[0] = kl[0] - 1 + if kl[1] % 2 == 0: + kl[1] = kl[1] - 1 + return kl + + +def get_weights_bmap(batch_bmap_y): + # batch_bmap_y: [n,nclass,h,w] + [n, nc, h, w] = batch_bmap_y.shape + x = np.sum(batch_bmap_y, axis=(-2, -1), keepdims=True) + x = np.asarray(x, dtype=np.float32) + x = 1 / x + x = nc * x / np.sum(x, axis=1) + y = np.zeros((n, nc, h, w), dtype=np.float32) + for ii in xrange(n): + for jj in xrange(nc): + y[ii, jj, :, :] = x[ii, jj, 0, 0] + return y + + +def trans_bmap_label01(bmap): + # bmap: [n,1,h,w] + # out: [n*h*w,2] + out = np.reshape(bmap, (-1, 1)) + out = np.concatenate((1 - bmap, bmap), axis=-1) + #out = np.concatenate((bmap,1-bmap),axis=1) + #out = np.swapaxes(bmap,) + return np.asarray(out, dtype=np.int32) + + +def fun_is_detect(his_res, cur_res): + # his_res: [n_historical_sampe+1,ntracker] + # cur_res: [ntracker] + mn_res = np.mean(his_res[0:-1, :], axis=0) + factor = 0.8 + if factor * np.max(mn_res) > np.max(cur_res): + flag_detect = True + else: + flag_detect = False + #idx = np.mod(iframe,his_res.shape[0]-1) + return flag_detect + + +def fun_get_search_scale(): + s1 = [1, 0.99, 1.01, 0.995, 0.985, 1.005, 1.015] # [1]#,0.995,0.99,1.005,1.01]# + # s1 = [1] + s2 = [1] # [1,0.99,0.995,1.01,1.005] + n1 = len(s1) + n2 = len(s2) + search_scale = np.zeros((n1 * n2, 2), dtype=np.float32) + count = 0 + for x in s1: + for y in s2: + search_scale[count, 0] = x + search_scale[count, 1] = x * y + count = count + 1 + assert(search_scale[0, 0] == 1 and search_scale[0, 1] == 1) + return search_scale + + +def fun_border_modification(ctr_rct, img_h, img_w): + # [w,h,wlen,hlen] + wlen = ctr_rct[2] + hlen = ctr_rct[3] + if ctr_rct[0] - wlen / 2 < 0: + ctr_rct[0] = wlen / 2 + 0.5 + if ctr_rct[0] + wlen / 2 > img_w: + ctr_rct[0] = img_w - wlen / 2 - 0.5 + if ctr_rct[1] - hlen / 2 < 0: + ctr_rct[1] = hlen / 2 + 0.5 + if ctr_rct[1] + hlen / 2 > img_h: + ctr_rct[1] = img_h - hlen / 2 - 0.5 + return ctr_rct + + +def fun_get_search_ctr(rct, factor=1.): + # [w,h] + offset_search = np.zeros((1, 2), dtype=np.float32) + wlist = (0,) # -rct[0]*factor,rct[0]*factor) + hlist = (0,) # -rct[1]*factor,rct[1]*factor) + count = 0 + for w in wlist: + for h in hlist: + offset_search[count, :] = [w, h] + count = count + 1 + return offset_search + + +def find_stable_tracker(response, maxres, mx_h, mx_w, loss, update_idx): + # weight_trackers: [ntracker] + # response: [ntracker,h,w] + # maxres: [ntracker] + # maxres_hw: [ntracker,2] + # R: [ntracker] + # loss: [nsample+1,ntracker] + + ntracker = response.shape[0] + nsample = loss.shape[0] - 1 + eps = 1.0e-4 + + for ii in xrange(ntracker): + loss[-1, ii] = maxres[ii] - response[ii, mx_h, mx_w] + + loss_mean = np.mean(loss[0:nsample, :], axis=0) # [ntracker] + loss_std = np.std(loss[0:nsample, :], axis=0) # [ntracker] + loss_mean[loss_mean < 0.0001] = 0 + loss_std[loss_std < 0.0001] = 0 + + # compute alpha + score_tracker = np.absolute(loss[-1, :] - loss_mean) / (loss_std + eps) + + #loss_idx = np.mod(iframe,nsample) + loss[update_idx, :] = loss[-1, :] + + return score_tracker + + +def update_weights(iframe, weight_trackers, response, maxres, maxres_hw, mx_scale, mx_h, mx_w, R, loss): + # weight_trackers: [ntracker] + # response: [ntracker,h,w] + # maxres: [ntracker] + # maxres_hw: [ntracker,2] + # R: [ntracker] + # loss: [nsample+1,ntracker] + + ntracker = response.shape[0] + nsample = loss.shape[0] - 1 + eps = 1.0e-6 + + for ii in xrange(ntracker): + loss[-1, ii] = maxres[ii] - response[ii, mx_h, mx_w] + + loss_mean = np.mean(loss[0:nsample, :], axis=0) # [ntracker] + loss_std = np.std(loss[0:nsample, :], axis=0) # [ntracker] + loss_mean[loss_mean < 0.0001] = 0 + loss_std[loss_std < 0.0001] = 0 + + # compute alpha + curDiff = np.absolute(loss[-1, :] - loss_mean) / (loss_std + eps) + + ## + loss_idx = np.mod(iframe, nsample) + loss[loss_idx, :] = loss[-1, :] + min_idx = np.argmin(curDiff) + W = np.zeros(ntracker, dtype=np.float32) + W[min_idx] = 1 + return loss, W + + ''' + print curDiff + alpha = 0.97*np.exp(-10*curDiff) + alpha[alpha>0.9] = 0.97 + alpha[alpha<0.12] = 0.119 + + ## compute R + lossA = np.sum(weight_trackers*loss[-1,:]) # \bar{l}_l^k + R = R*alpha + (1-alpha)*(lossA-loss[-1,:]) + + print alpha + print R + ## update loss + loss_idx = np.mod(iframe,nsample) + loss[loss_idx,:] = loss[-1,:] + + ## update weights + A = 0.#0.011 + W = np.zeros(ntracker,dtype=np.float32) + mn = np.mean(R) + for ii in xrange(ntracker): + x = R[ii] - mn + if x<=0: + W[ii] = 0 + else: + W[ii] = x/mx_scale*np.exp((x*x/(2*mx_scale))) + W = W/np.sum(W) + ''' + + # return loss,R,W + + +def get_maxdirection(x): + n = len(x) + m = np.sum(x < 0) + r = 1. * m / n + if r > 0.5: + y = np.mean(x[x < 0]) + else: + y = np.mean(x[x >= 0]) + return y + + +def get_maxdirection2(maxres_hw): + n = maxres_hw.shape[0] + m = np.sum(maxres_hw < 0, axis=0) + flag = np.ones((1, 2), dtype=np.float32) + if 1. * m[0] / n > 0.5: + flag[0, 0] = -1 + if 1. * m[1] / n > 0.5: + flag[0, 1] = -1 + u = np.prod(maxres_hw * flag, axis=-1) + return np.where(u >= 0)[0] + + +def get_max_ps(response, ctr_h, ctr_w): + # [h,w] + # output: choose from [h,w] + [h, w] = response.shape + mxv_idx = np.argmax(response) + maxres_hw = np.unravel_index(mxv_idx, response.shape) + maxres = response[maxres_hw[0], maxres_hw[1]] + + mx_h = maxres_hw[0] - ctr_h + mx_w = maxres_hw[1] - ctr_w + return maxres, mx_h, mx_w + + +def get_ps_offset(response, ctr_h, ctr_w, stable_scores): + # [ntracker,h,w] + # output: choose from [h,w] + [ntracker, h, w] = response.shape + maxres = np.zeros((ntracker), dtype=np.float32) + maxres_hw = np.zeros((ntracker, 2), dtype=np.int32) + #mx_h = 0. + #mx_w = 0. + for kk in xrange(ntracker): + mxv_idx = np.argmax(response[kk]) + maxres_hw[kk] = np.unravel_index(mxv_idx, response[kk].shape) + maxres[kk] = response[kk, maxres_hw[kk, 0], maxres_hw[kk, 1]] + #mx_h = mx_h + maxres_hw[kk,0]*weight_trackers[kk] + #mx_w = mx_w + maxres_hw[kk,1]*weight_trackers[kk] + + maxres_hw[:, 0] = maxres_hw[:, 0] - ctr_h + maxres_hw[:, 1] = maxres_hw[:, 1] - ctr_w + + ''' + ## 1 + mx_h = get_maxdirection(maxres_hw[0:-1,0]) + mx_w = get_maxdirection(maxres_hw[0:-1,1]) + + mx_h = 0.5*mx_h + 0.5*maxres_hw[-1,0] + mx_w = 0.5*mx_w + 0.5*maxres_hw[-1,1] + + ''' + # 2 + ''' + idx0 = get_maxdirection2(maxres_hw[0:-1,:]) + idx = np.argmax(maxres[idx0]) + idx = idx0[idx] + mx_h = maxres_hw[idx,0] + mx_w = maxres_hw[idx,1] + mx_h = 0.5*mx_h + 0.5*maxres_hw[-1,0] + mx_w = 0.5*mx_w + 0.5*maxres_hw[-1,1] + ''' + ''' + ## 3 + mx_h = get_maxdirection(maxres_hw[0:-1,0]) + mx_w = get_maxdirection(maxres_hw[0:-1,1]) + + mx_h = 0.5*mx_h + 0.5*maxres_hw[-1,0] + mx_w = 0.5*mx_w + 0.5*maxres_hw[-1,1] + idx = 0 + ''' + # 5 + ''' + idx = np.argmin(stable_scores) + mx_h = 0.5*maxres_hw[idx,0] + 0.5*maxres_hw[-1,0] + mx_w = 0.5*maxres_hw[idx,1] + 0.5*maxres_hw[-1,1] + ''' + + # 6/7 + ''' + idx0 = get_maxdirection2(maxres_hw[0:-1,:]) + idx = np.argmin(stable_scores[idx0]) + idx = idx0[idx] + if maxres[idx]>=maxres[-1]: + mx_h = maxres_hw[idx,0] + 0.5*maxres_hw[-1,0] + mx_w = maxres_hw[idx,1] + 0.5*maxres_hw[-1,1] + else: + idx = -1 + mx_h = maxres_hw[-1,0] + mx_w = maxres_hw[-1,1] + ''' + # 8 + idx0 = get_maxdirection2(maxres_hw[0:-1, :]) + idx = np.argmin(stable_scores[idx0]) + idx = idx0[idx] + if maxres[idx] >= maxres[-1] and stable_scores[idx] < stable_scores[-1]: # small is better + mx_h = maxres_hw[idx, 0] + mx_w = maxres_hw[idx, 1] + else: + idx = -1 + mx_h = maxres_hw[-1, 0] + mx_w = maxres_hw[-1, 1] + # 9 + ''' + idx = np.argmax(maxres[0:-1]) + if maxres[idx]>=maxres[-1] and stable_scores[idx] 0: + strnn_list = [] + for x in srnn_directions: + assert(x == 'topleft' or x == 'bottomright' or x == 'topright' or x == + 'bottomleft' or x == 'previous' or x == 'current') + strnn_list.append((x, 'current')) + if x == 'previous': + trnn_flag = True + else: + strnn_list = [('current',)] + return strnn_list, trnn_flag + + +def fun_get_patch(x, ctr, sz): + # [w,h] + h = x.shape[0] + w = x.shape[1] + hidx = np.int32(np.floor(ctr[1] - sz[1] / 2 + 0.5) + np.arange(sz[1])) + widx = np.int32(np.floor(ctr[0] - sz[0] / 2 + 0.5) + np.arange(sz[0])) + hidx[hidx < 0] = 0 + hidx[hidx > h - 1] = h - 1 + widx[widx < 0] = 0 + widx[widx > w - 1] = w - 1 + # print hidx, widx + [hidx, widx] = np.meshgrid(hidx, widx) + hidx = hidx.transpose() + widx = widx.transpose() + # print hidx, widx + return np.asarray(x[hidx, widx, :], dtype=np.float32) + + +def fun_response(model_w, xf, zf, kernel_type, sigma, gamma): + # xf: nx*(nd*c)*h*w, frequency domain + # zf: nz*(nd*c)*h*w, frequency domain + # model_w: (2*nx)*1*h*w, real domain + # output: nz*h*w, real_domain + h = xf.shape[-2] + w = xf.shape[-1] + nz = zf.shape[0] + nx = xf.shape[0] + if nx == 1: + assert(nz == 1) + assert(model_w.shape[0] == h and model_w.shape[1] == w) + k_zf_xf = fun_correlation_singlesample(zf[0], xf[0], kernel_type, sigma) # h*w + return np.real(np.fft.ifft2(k_zf_xf * model_w)) + else: + kernel_zf_xf = fun_kernel_multisample(xf, zf, kernel_type, sigma) # (2*nz)*(2*nx)*h*w + response = np.zeros((2 * nz, h, w)) + for ii in xrange(h): + for jj in xrange(w): + response[:, ii, jj] = (np.dot(kernel_zf_xf[:, :, ii, jj], model_w[:, :, ii, jj])).flatten() + return np.real(np.fft.ifft2(response[0:nz, :, :] + 1.0j * response[nz::, :, :], axes=(-2, -1))) + + +def fun_w(xf, yf, kernel_type, sigma, gamma): + # xf: n*(nd*c)*h*w, frequency domain + # yf: n*1*h*w frequency domain + # output: w >> (2*n)*1*h*w, =(I-(gamma*I+A'A)^-1*A'A)*y + assert(xf.shape[2:] == yf.shape[2:] and xf.shape[0] == yf.shape[0]) + n = xf.shape[0] + h = xf.shape[-2] + w = xf.shape[-1] + if n == 1: + k_xf = fun_correlation_singlesample(xf[0], xf[0], kernel_type, sigma) + gamma # h*w + pjt = np.divide(yf[0, 0], k_xf) + else: + AtA = fun_kernel_multisample(xf, xf, kernel_type, sigma) + pjt = np.zeros((2 * n, 1, h, w), dtype=np.float32) + gammaI = gamma * np.eye(2 * n, dtype=np.float32) + + y = np.concatenate((np.real(yf), np.imag(yf)), axis=0) + for ii in xrange(h): + for jj in xrange(w): + pjt[:, :, ii, jj] = y[:, :, ii, jj] - \ + np.dot(np.dot(np.linalg.inv(AtA[:, :, ii, jj] + gammaI), AtA[:, :, ii, jj]), y[:, :, ii, jj]) + return pjt + + +def fun_kernel_multisample(xf, yf, kernel_type, sigma): + # xf: nx*(nd*c)*h*w, frequency domain + # yf: ny*(nd*c)*h*w or 1*(nd*c)*h*w, frequency domain + # out_kernel: (2*ny)*(2*nx)*h*w, for each position yf'*xf + assert(xf.shape[1:] == yf.shape[1:]) + h = xf.shape[-2] + w = xf.shape[-1] + nx = xf.shape[0] + ny = yf.shape[0] + out_kernel = np.zeros((ny * 2, nx * 2, h, w), dtype=np.float32) + + if kernel_type == 'gaussian': + for ii in xrange(h): + for jj in xrange(w): + xx = xf[:, :, ii, jj] + yy = yf[:, :, ii, jj] + d_yr_xr = cdist(np.real(yy), np.real(xx), 'sqeuclidean') + d_yr_xi = cdist(np.real(yy), np.imag(xx), 'sqeuclidean') + d_yi_xr = cdist(np.imag(yy), np.real(xx), 'sqeuclidean') + d_yi_xi = cdist(np.imag(yy), np.imag(xx), 'sqeuclidean') + + out_kernel[0:ny, 0:nx, ii, jj] = d_yr_xr + d_yi_xi + out_kernel[0:ny, nx::, ii, jj] = d_yr_xi - d_yi_xr + out_kernel[ny::, 0:nx, ii, jj] = d_yi_xr - d_yr_xi + out_kernel[ny::, nx::, ii, jj] = d_yr_xr + d_yi_xi + out_kernel = np.exp(-out_kernel / (sigma * sigma * h * w)) # ?? + elif kernel_type == 'linear': + for ii in xrange(h): + for jj in xrange(w): + xx = xf[:, :, ii, jj] + yy = yf[:, :, ii, jj] + d_yr_xr = np.dot(np.real(yy), (np.real(xx)).transpose()) + d_yr_xi = np.dot(np.real(yy), (np.imag(xx)).transpose()) + d_yi_xr = np.dot(np.imag(yy), (np.real(xx)).transpose()) + d_yi_xi = np.dot(np.imag(yy), (np.imag(xx)).transpose()) + + out_kernel[0:ny, 0:nx, ii, jj] = d_yr_xr + d_yi_xi + out_kernel[0:ny, nx::, ii, jj] = d_yr_xi - d_yi_xr + out_kernel[ny::, 0:nx, ii, jj] = d_yi_xr - d_yr_xi + out_kernel[ny::, nx::, ii, jj] = d_yr_xr + d_yi_xi + out_kernel = out_kernel / (h * w) # ??? + else: + assert(kernel_type == 'gaussian' or kernel_type == 'linear') + return out_kernel + + +def fun_correlation_singlesample(xf, yf, kernel_type, sigma): + # xf: c*h*w, frequency domain + # yf: c*h*w, frequency domain + # sigma: gaussian param + # kernel_type: 'linear', or 'gaussian' + # output: h*w + # Note: yf.conjugate() + assert(xf.shape == yf.shape) + N = xf.shape[1] * xf.shape[2] + M = N * xf.shape[0] + if kernel_type == 'linear': + kf = np.sum(xf * yf.conjugate(), axis=0) / M + elif kernel_type == 'gaussian': + + N = xf.shape[-2] * xf.shape[-1] + xx = np.real((xf.conjugate() * xf).sum()) / N # n*nd*c*1*1 + yf_conj = yf.conjugate() + yy = np.real((yf * yf_conj).sum()) / N + + xyf = xf * yf_conj + xy = np.sum(np.real(np.fft.ifft2(xyf, axes=(-2, -1))), axis=0) + + uu = (xx + yy - 2 * xy) / M + uu = uu * (uu > 0) + kf = np.fft.fft2(np.exp((-1 / (sigma * sigma)) * uu), axes=(-2, -1)) + else: + assert(kernel_type == 'gaussian' or kernel_type == 'linear') + return kf + + +def fun_precision_location(pre_ctr_rcts, gt_ctr_rcts, thre=50, step=1): + # pre_ctr_rcts, gt_ctr_rcts: ctr + n = pre_ctr_rcts.shape[0] + # ctr localization + diff_ps = np.sqrt(np.sum(np.square(pre_ctr_rcts[:, 0:2] - gt_ctr_rcts[:, 0:2]), axis=-1)) + x = np.arange(0, thre + step / 2, thre) + m = len(x) + ratio_ps = np.zeros((m, 2), dtype=np.float32) + ratio_ps[:, 0] = x + for ii in range(m): + pc = x[ii] + ratio_ps[ii, 1] = np.sum(diff_ps <= pc) + return np.mean(diff_ps), ratio_ps, diff_ps + + +def fun_precision_overlap(pre_ctr_rcts, gt_ctr_rcts, step=0.05): + # overlap + assert(pre_ctr_rcts.shape == gt_ctr_rcts.shape) + n = pre_ctr_rcts.shape[0] + # from ctr to box + pbbx = fun_ctr2rct(pre_ctr_rcts) + pbbx[:, 2] = pbbx[:, 0] + pbbx[:, 2] + pbbx[:, 3] = pbbx[:, 1] + pbbx[:, 3] + gbbx = fun_ctr2rct(gt_ctr_rcts) + gbbx[:, 2] = gbbx[:, 0] + gbbx[:, 2] + gbbx[:, 3] = gbbx[:, 1] + gbbx[:, 3] + + lt = np.dstack((pbbx[:, 0:2], gbbx[:, 0:2])) + br = np.dstack((pbbx[:, 2:4], gbbx[:, 2:4])) + ## + whs_min = np.min(lt, axis=-1) + whs_max = np.max(lt, axis=-1) + whe_min = np.min(br, axis=-1) + whe_max = np.max(br, axis=-1) + + # intersection + inner = whe_min - whs_max + inner[inner < 0] = 0 + inner = np.prod(inner, axis=-1) + + # union + outer = whe_max - whs_min + outer[outer < 0] = 0 + outer = np.prod(outer, axis=-1) + outer[outer <= 0] = 1 + + diff = np.divide(inner, outer) + x = np.arange(0, 1 + step / 2, step) + m = len(x) + ratio = np.zeros((m, 2), dtype=np.float32) + ratio[:, 0] = x + for ii in range(m): + k = np.sum(diff > x[ii]) + ratio[ii, 1] = 1. * k / n + return np.mean(diff), ratio, diff + + +def fun_shift_feas(x, px, directions, step, outx): + # x,px: n*c*(h+2*step)*(w+2*step) + # outx: n*(ndirections*c)*h*w + xs = x.shape + os = outx.shape + assert(xs[0] == os[0] and xs[2] == os[2] + 2 * step and xs[3] == os[3] + 2 * step) + assert(xs[1] * len(directions) == os[1]) + c = xs[1] + h = os[2] + w = os[3] + kk = 0 + for dct in directions: + if dct == 'topleft': + outx[:, kk:kk + c, :, :] = x[:, :, 0:h, 0:w] + elif dct == 'topright': + outx[:, kk:kk + c, :, :] = x[:, :, 0:h, 2 * step::] + elif dct == 'bottomleft': + outx[:, kk:kk + c, :, :] = x[:, :, 2 * step::, 0:w] + elif dct == 'bottomright': + outx[:, kk:kk + c, :, :] = x[:, :, 2 * step::, 2 * step::] + elif dct == 'current': + outx[:, kk:kk + c, :, :] = x[:, :, step:-step, step:-step] + elif dct == 'previous': + outx[:, kk:kk + c, :, :] = px[:, :, step:-step, step:-step] + else: + assert(dct == 'topleft' or dct == 'topright' or dct == 'bottomleft' or dct == + 'bottomright' or dct == 'current' or dct == 'previous') + kk = kk + c + return + + +def fun_get_freq_fea(x, is_cos_window): + # x: n*directions*c*h*w + h = x.shape[-2] + w = x.shape[-1] + if is_cos_window: + shp = np.ones(len(x.shape)) + shp[-2] = h + shp[-1] = w + x = x * np.reshape(fun_cos_win(h, w), shp) + xf = np.fft.fft2(x, axes=(-2, -1)) + return xf + + +def fun_cos_win(h, w, shp): + x = np.arange(h) + x = x.reshape((h, 1)) + x = 0.5 * (1 - np.cos((2 * np.pi / (h - 1)) * x)) + y = np.arange(w) + y = y.reshape((1, w)) + y = 0.5 * (1 - np.cos((2 * np.pi / (w - 1)) * y)) + xy = np.dot(x, y) + xy = np.asarray(xy, dtype=np.float32) + # save_mat_file('1.mat',xy,None,None,None) + # assert(3==1) + #xy = xy.reshape((h,w,1)) + return np.reshape(xy, shp) + + +def fun_draw_rct_on_image(im, fname, rct1, rct2, rct3): + if len(im.shape) == 2: + im = np.dstack((im, im, im)) + im = Image.fromarray(im) + dr = ImageDraw.Draw(im) + if rct1 is not None: + ws = rct1[0] - np.floor(rct1[2] / 2) + hs = rct1[1] - np.floor(rct1[3] / 2) + dr.rectangle(((ws, hs), (ws + rct1[2], hs + rct1[3])), outline="blue") + if rct2 is not None: + ws = rct2[0] - np.floor(rct2[2] / 2) + hs = rct2[1] - np.floor(rct2[3] / 2) + dr.rectangle(((ws, hs), (ws + rct2[2], hs + rct2[3])), outline="green") + if rct3 is not None: + ws = rct3[0] - np.floor(rct3[2] / 2) + hs = rct3[1] - np.floor(rct3[3] / 2) + dr.rectangle(((ws, hs), (ws + rct3[2], hs + rct3[3])), outline="red") + del dr + im.save(fname) + + +def fun_draw_mask_on_image(im, fname, bmap, rct_ctr, win_sz): + h = im.shape[0] + w = im.shape[1] + if len(im.shape) == 2: + im = np.dstack((im, im, im)) + + sz = np.int32(win_sz) + hs = np.max((0, rct_ctr[1] - sz[1] / 2 + 0.5)) + he = np.min((h - 1, hs + sz[1])) + + ws = np.max((0, rct_ctr[0] - sz[0] / 2 + 0.5)) + we = np.min((w - 1, ws + sz[0])) + + hs = np.uint32(hs) + he = np.uint32(he) + ws = np.uint32(ws) + we = np.uint32(we) + #sz = np.uint32(win_sz) + x = MISC.imresize(np.uint8(bmap) * 255, np.uint32([he - hs, we - ws]), interp='bicubic') + y = im[hs:he, ws:we, :] + hidx, widx = np.where(x > 128) + # print len(hidx),len(widx),hs,he,ws,we + y[hidx, widx, :] = 255 + + im[hs:he, ws:we, :] = y + + im = Image.fromarray(im) + im.save(fname) + + +def fun_get_max_response(x1, x2, x3, x4): + max_idx = np.argmax(x1) + [c1, h1, w1] = np.unravel_index(max_idx, x1.shape) + max_v1 = x1[c1, h1, w1] + + max_idx = np.argmax(x2) + [c2, h2, w2] = np.unravel_index(max_idx, x2.shape) + max_v2 = x2[c2, h2, w2] + + max_idx = np.argmax(x3) + [c3, h3, w3] = np.unravel_index(max_idx, x3.shape) + max_v3 = x3[c3, h3, w3] + + max_idx = np.argmax(x4) + [c4, h4, w4] = np.unravel_index(max_idx, x4.shape) + max_v4 = x4[c4, h4, w4] + + v = (max_v1, max_v2, max_v3, max_v4) + max_idx = np.argmax(v) + max_v = v[max_idx] + idx = ((c1, h1, w1), (c2, h2, w2), (c3, h3, w3), (c4, h4, w4)) + return max_v, idx[max_idx] + + +def fun_binarize_map(prop): + prop[prop >= 0.5] = 1 + prop[prop <= 0.5] = 0 + return prop + + +def fun_get_binary_map(win_size, rct_size, cell_size): + #win_ctr = np.floor(win_size/2) + + win_size2 = np.floor(win_size / cell_size + 0.5) + rct_size2 = np.floor(rct_size / cell_size) + win_ctr2 = np.int32(np.floor(win_size2 / 2 + 0.5)) - 1 + + h = win_size2[1] + w = win_size2[0] + ctrh = win_ctr2[1] + ctrw = win_ctr2[0] + + m = np.zeros((h, w), dtype=np.float32) + + hs = ctrh - np.floor(rct_size2[1] / 2) + he = hs + rct_size2[1] + ws = ctrw - np.floor(rct_size2[0] / 2) + we = ws + rct_size2[0] + + m[hs:he, ws:we] = 1.0 + return m + + +def fun_get_peak_map(win_size, rct_size, cell_size, fea_sz, isroll): + # input >> [width,height] + # output >> matrix of [h,w] + + sigma_factor = 0.1 + sigma = np.sqrt(rct_size[1] * rct_size[0]) * sigma_factor / cell_size + + #win_size2 = np.floor(win_size/cell_size+0.5) + win_size2 = fea_sz + [rs, cs] = np.meshgrid(np.arange(1, win_size2[0] + 1) - np.floor(win_size2[0] / 2 + 0.5), + np.arange(1, win_size2[1] + 1) - np.floor(win_size2[1] / 2 + 0.5)) + m = np.exp(-0.5 / (sigma * sigma) * (rs * rs + cs * cs)) + m = m.astype(np.float32) + + if isroll: + m = np.roll(m, -np.int32(np.floor(win_size2[1] / 2 + 0.5)) + 1, axis=0) + m = np.roll(m, -np.int32(np.floor(win_size2[0] / 2 + 0.5)) + 1, axis=1) + ctr_h = 0 + ctr_w = 0 + assert m[0, 0] == 1 + else: + ctr_h = np.int32(np.floor(win_size2[1] / 2 + 0.5)) - 1 + ctr_w = np.int32(np.floor(win_size2[0] / 2 + 0.5)) - 1 + assert m[ctr_h, ctr_w] == 1 + return m, ctr_h, ctr_w + + +def fun_get_search_window2(target_sz, im_sz, magh=None, magw=None): + ## [width, height] + # if padding_type==None: use all other three vars + # else use target_sz, padding + # ''' + # pdb.set_trace() + # if magh is None or magw is None: + # if target_sz[1]/im_sz[1] > 0.3: + # if target_sz[1]/target_sz[0] <1.5: + # magh = 2 + # magw = 2 + # else: + # magh = 1.4 + # magw = 2 + # elif np.max(target_sz) < 30: + # # magh = 3 #3.2 + # # magw = 3 #3.2 + # magh = 3.2 + # magw = 3.2 + # elif target_sz[1]/target_sz[0]>2 or np.max(target_sz) > 60: + # # magh = 2 #1.4 + # magh = 1.4 + # magw = 2 + # else: + # magh = 2.4 + # magw = 2.4 + # # # ''' + + if magh is None or magw is None: + if target_sz[1] / target_sz[0] > 2: + magh = 1.4 + magw = 3.2 + elif np.prod(target_sz) / np.prod(im_sz) > 0.05: + magh = 2 + magw = 2 + else: + magh = 3.2 + magw = 3.2 + + # print target_sz, magw, magh + # sz = target_sz * np.array([magw, magh]) + # mxsz = np.max(sz) + # magh = mxsz / target_sz[1] + # magw = mxsz / target_sz[0] + + window_sz = np.floor(target_sz * np.array([magw, magh])) + return window_sz, magh, magw + + +def fun_resize_samples(X, rcts, padding): + if np.sqrt(gt_rcts[0, 2] * gt_rcts[0, 3]) > 80: + n = X.shape[0] + oh = X.shape[1] + ow = X.shape[2] + if len(X.shape) == 4: + isColor = True + else: + isColor = False + + sc = 80.0 / np.amax(gt_rcts[0, 2:]) + scale_factor = np.asarray([sc, sc], dtype=np.float32) + h = np.int16(np.floor(oh * scale_factor[1])) # +0.5 + w = np.int16(np.floor(ow * scale_factor[0])) # +0.5 + if isColor == True: + Z = np.zeros((n, h, w, 3), dtype=X.dtype) + for ii in range(n): + Z[ii, :, :, :] = MISC.imresize(X[ii, :, :, :], [h, w], interp='bicubic') + else: + Z = np.zeros((n, h, w), dtype=X.dtype) + for ii in range(n): + Z[ii, :, :] = MISC.imresize(X[ii, :, :], [h, w], interp='bicubic') + + rcts = np.zeros((n, 4), dtype=np.float32) + rcts[:, 0] = gt_rcts[:, 0] * scale_factor[0] + rcts[:, 1] = gt_rcts[:, 1] * scale_factor[1] + rcts[:, 2] = gt_rcts[:, 2] * scale_factor[0] + rcts[:, 3] = gt_rcts[:, 3] * scale_factor[1] + rcts = np.floor(rcts) # floor?? + + else: + rcts = np.copy(gt_rcts) + Z = np.copy(X) + scale_factor = np.asarray([1, 1], dtype=np.float32) + + nm_rct_size = np.copy(rcts[0, 2:]) + nm_win_size = np.floor(nm_rct_size * (1 + padding)) + + return Z, scale_factor, nm_rct_size, nm_win_size, rcts diff --git a/tracking/tf_utis.py b/tracking/tf_utis.py new file mode 100644 index 0000000..52b906a --- /dev/null +++ b/tracking/tf_utis.py @@ -0,0 +1,70 @@ +import os +import tensorflow as tf +import numpy as np +from configer import * + +# fea_sz = [57,57] +# kernel_sigma = 0.5 +# kernel_type = 'linear' +# kernel_gamma = 1.0 # 1.e-6 +# pca_energy = 100 +# nn_p = 6 # +nscale = search_scale.shape[0] +M = pca_energy*fea_sz[0]*fea_sz[1] + +class Response: + def __init__(self): + self.vgg_fea_pca = tf.placeholder(tf.float64,[7,nn_p,fea_sz[0]*fea_sz[1],pca_energy]) #7*6*3249*100 + self.model_alphaf = tf.placeholder(tf.complex64,[nn_p,fea_sz[0],fea_sz[1]]) #6*57*57 + self.model_xf = tf.placeholder(tf.complex64,[nn_p,1,pca_energy,fea_sz[0],fea_sz[1]]) #6*1*100*57*57 + + with tf.name_scope("get_response"): + self.build(self.vgg_fea_pca,self.model_alphaf,self.model_xf) + + def build(self,vgg_fea_pca,model_alphaf,model_xf): + + vgg_fea_pca = tf.transpose(vgg_fea_pca,[2,0,1,3]) #3249*7*6*100 + vgg_fea_pca = tf.reshape(vgg_fea_pca,[fea_sz[0],fea_sz[1],7,nn_p,pca_energy]) #57*57*7*6*100 + vgg_fea_pca = tf.transpose(vgg_fea_pca,perm=[2,3,4,0,1]) #7*6*100*57*57 + vgg_fea_pca = tf.cast(vgg_fea_pca,dtype=tf.complex64) + model_xf = tf.transpose(model_xf,perm=[1,0,2,3,4]) #1*6*100*57*57 + + zf = tf.fft2d(vgg_fea_pca) #7*6*100*57*57 + k_zf_xf = tf.reduce_sum(tf.multiply(zf,tf.conj(model_xf)),axis=2)/M #7*6*57*57 + + response = tf.real(tf.ifft2d(k_zf_xf * model_alphaf)) + self.response = tf.expand_dims(response,axis=0) + + +class PCA: + def __init__(self): + self.is_mean = tf.placeholder(tf.bool) + self.x_mean = tf.placeholder(tf.float32,[nn_p,1,512]) #6*1*512 + self.is_norm = tf.placeholder(tf.bool) + self.x_norm = tf.placeholder(tf.float32,[nn_p]) #6 + self.w = tf.placeholder(tf.float32,[nn_p,512,pca_energy]) #6*512*100 + + self.vgg_fea = tf.placeholder(tf.float32,[nscale,fea_sz[0],fea_sz[1],512*nn_p]) #7*57*57*3072 + + + with tf.name_scope("Pca_te"): + self.build(self.is_mean, self.x_mean, self.is_norm, self.x_norm, self.w, self.vgg_fea) + + def build(self, is_mean, x_mean, is_norm, x_norm, w, vgg_fea): + + x_mean = tf.tile(tf.expand_dims(x_mean,axis=0),multiples=[nscale,1,1,1]) #7*6*1*512 + self.tmp3 = x_mean + x_norm = tf.expand_dims(x_norm,axis=0) + x_norm = tf.expand_dims(x_norm,axis=0) + x_norm = tf.expand_dims(x_norm,axis=0) + x_norm = tf.transpose(x_norm,perm=[0,3,1,2]) #1*6*1*1 + + w = tf.tile(tf.expand_dims(w,axis=0),multiples=[nscale,1,1,1]) #7*6*512*100 + vgg_fea = tf.transpose(vgg_fea,perm=[0,3,1,2]) #7*3072*57*57 + vgg_fea = tf.reshape(vgg_fea,[nscale,512*nn_p,fea_sz[0]*fea_sz[1]]) #7*3072*3249 + vgg_fea = tf.transpose(vgg_fea,perm=[1,0,2]) #3072*7*3249 + vgg_fea = tf.transpose(tf.reshape(vgg_fea,[nn_p,512,nscale,fea_sz[0]*fea_sz[1]]),perm=[2,0,3,1]) #7*6*3249*512 + + z = tf.cond(is_mean, lambda: vgg_fea - x_mean, lambda: vgg_fea) + z = tf.cond(is_norm, lambda: tf.divide(z,x_norm), lambda: z) + self.vgg_fea_pca = tf.matmul(z,w) #7*6*3249*100 diff --git a/tracking/tracker.py b/tracking/tracker.py new file mode 100644 index 0000000..d38a508 --- /dev/null +++ b/tracking/tracker.py @@ -0,0 +1,289 @@ +#encoding:utf-8 + +import argparse +import tf_utis +from tf_utis import * +from configer import * + +parser = argparse.ArgumentParser() +parser.add_argument('--start','-s',default=0,type=int) +parser.add_argument('--end','-e',default=100,type=int) +parser.add_argument('--gpu','-g',default=0,type=str) +parser.add_argument('--seq','-seq',default=None,type=str) +opt = parser.parse_args() + +os.environ["CUDA_VISIBLE_DEVICES"] = opt.gpu +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' +#################################################### +##### Main Fun ############ +#################################################### +if __name__ == '__main__': + #### + if not os.path.isdir(data_path): + raise Exception('data not exist',data_path) + + #### + fdrs = next(os.walk(data_path))[1] + fdrs = sorted(fdrs) + n_fdr = len(fdrs) + #### build VGG model + with tf.device(gpu_id): + vgg = vgg19_tf.Vgg19( vgg_model_path, vgg_out_layers) + vgg_sess = tf.Session(config = config) + pca_te = tf_utis.PCA() + pca_sess = tf.Session(config = config) + res_map = tf_utis.Response() + res_sess = tf.Session(config = config) + vgg_map_total, vgg_map_idx, vgg_map_nlayer = vgg.out_map_total, vgg.out_map_idx, vgg.out_map_nlayer + + #### + for ifdr in range(opt.start,opt.end):#np.arange (start_sample,end_sample,step_sample):#n_fdr + ## + fdr = fdrs[ifdr] + if opt.seq is not None: + if fdr != opt.seq: + continue + # continue + fpath = os.path.join(data_path,fdr) + f_rcts = glob.glob(fpath+'/groundtruth_rect*.txt') + f_imgs = glob.glob(fpath+'/img/*.jpg') + n_img = len(f_imgs) + n_rct = len(f_rcts) + f_imgs = sorted(f_imgs,key=str.lower) + + print("{}:{}:{}".format(ifdr, fdr, n_img)) + ## read images >> X0 + # n_img = 30 + for ii in range(n_img): + img = read_image(f_imgs[ii],True,True,-1) + if ii == 0: + im_sz = np.asarray([img.shape[1],img.shape[0]]) + X0 = np.zeros((n_img,img.shape[0],img.shape[1],3),dtype=np.uint8) + X0[ii,:,:,:] = img + del img + + ################# each sequence ############################## + for iseq in range(n_rct): + str1 = 'result_%s_%d_%.3f.mat' %(fdr,iseq,update_factor) + fname = os.path.join(cache_path,str1) + if os.path.isfile(fname): + print("{} existed result".format(fname)) + continue + + #### log file + str1 = 'log_%s_%s_%d.txt' %(pstr,fdr,iseq) #pstr=gcnn + log_file = os.path.join(cache_path,str1) + logger = Logger(logname=log_file, loglevel=1, logger=">>").getlog() + + #### load rct and convert it ctr style + gt_rcts0 = np.loadtxt(f_rcts[iseq],delimiter=',') + gt_rcts = np.floor(fun_rct2ctr(gt_rcts0)) + + #### set peak map + target_sz = gt_rcts[0,2:] + window_sz,padding_h, padding_w = fun_get_search_window2(target_sz,im_sz,None,None) + + cell_size=np.prod(window_sz)/(fea_sz[0]*fea_sz[1]) + if cell_size == 0: + cell_size = 1 + print("cell_size:{}".format(cell_size)) + + pmap,pmap_ctr_h,pmap_ctr_w = fun_get_peak_map(window_sz,target_sz,cell_size,fea_sz,False) + assert(pmap.shape[0]==fea_sz[0] and pmap.shape[1]==fea_sz[1]) + str1 = "target_sz: [%d, %d], window_sz: [%d, %d], pmap.shape: [%d, %d], cellsize: [%d] " %(target_sz[0], target_sz[1],\ + window_sz[0], window_sz[1], pmap.shape[0], pmap.shape[1], cell_size) + logger.info(str1) + + prod_hw = fea_sz[1]*fea_sz[0] + y = np.expand_dims(pmap,axis=0) + y = np.expand_dims(y,axis=0) + yf = np.fft.fft2(y,axes=(-2,-1)) + + #### cos_win + cos_win = fun_cos_win(fea_sz[1],fea_sz[0],(1,fea_sz[1],fea_sz[0],1))*1.0e-3#/vgg_map_total + + #### + in_shape = (1,fea_sz[1],fea_sz[0],vgg_map_total) + vgg_fea = np.zeros(in_shape,dtype=np.float32) + #### + model_alphaf = np.zeros((nn_p,fea_sz[1],fea_sz[0]),dtype=np.complex128) + model_xf = np.zeros((nn_p,1,100,fea_sz[1],fea_sz[0]),dtype=np.complex128) + #### + pred_rcts = np.zeros((n_img,4),dtype=np.float32) + pred_rcts[0,:] = np.copy(gt_rcts[0,:]) + cur_rct = np.copy(pred_rcts[0,:]) + + save_fdr = '%s_%d' %(fdr,iseq) + save_path = os.path.join(cache_path,save_fdr) + if not os.path.isdir(save_path): + os.mkdir(save_path) + + ############################## extract feature ########################## + def extract_feature(ims,ctr_rcts,cos_win,padding_hw): + padding_h = padding_hw[0] + padding_w = padding_hw[1] + n = ctr_rcts.shape[0] + l = len(ims.shape) + # pdb.set_trace() + if l == 4: + assert(ims.shape[0]==n) + ## crop out patch + patches = [] + for ii in range(n): + # window_sz,_,_ = fun_get_search_window2(ctr_rcts[ii,2:],None,padding_h,padding_w) # ?? + window_sz,_,_ = fun_get_search_window2(ctr_rcts[ii,2:],None,padding_h,padding_w) + if l==4: + patch = fun_get_patch(np.copy(ims[ii]),ctr_rcts[ii,0:2],window_sz) + else: + # pdb.set_trace() + patch = fun_get_patch(np.copy(ims),ctr_rcts[ii,0:2],window_sz) + patches.append(patch) + patches = vgg_process_images(patches,**img_param) + + feed_dict = {vgg.images: patches, vgg.nscale: 1,vgg.cos_win:cos_win} + if n != 1: + feed_dict1 = {vgg.images: patches, vgg.nscale: 7,vgg.cos_win:cos_win} + vgg_fea1 = vgg_sess.run(vgg.vgg_fea1, feed_dict=feed_dict1) + else: + vgg_fea1 = vgg_sess.run(vgg.vgg_fea1, feed_dict=feed_dict) + return vgg_fea1 + + ######################### + nn_d = nn_p*pca_energy #vgg_map_total + nn_m = prod_hw + assert(nn_d%nn_p ==0) + nn_map = np.int32(nn_d/nn_p) + + #### + flag_occ = 0 + for jj in range(n_img): + #### sampling patch + im = np.copy(X0[jj,:,:,:]) + + ################################################################# + ##################### predict process ########################### + ################################################################# + if jj > 0: + # wsz,_,_ = fun_get_search_window2(cur_rct[2:],None,padding_h,padding_w) + wsz,padding_h,padding_w = fun_get_search_window2(cur_rct[2:],im_sz,None,None) + padding_hw = np.array([padding_h,padding_w],dtype = np.float32) + search_offset = fun_get_search_ctr(wsz,factor=0.4) + noffset = search_offset.shape[0] + nscale = search_scale.shape[0] + tmp_rcts = np.zeros((noffset*nscale,4)) + tmp_pred_rcts = np.zeros((n_img,nn_p,4),dtype = np.float32) + response = np.zeros((noffset,nn_p,nscale,fea_sz[1],fea_sz[0])) + + count = 0 + for ioffset in range(noffset): + ctr0 = np.floor(search_offset[ioffset,0:2] + cur_rct[0:2]+0.5) + for iscale in range(nscale): + tmp_rcts[count,0:2] = np.copy(ctr0) + tmp_rcts[count,2:] = cur_rct[2:]*search_scale[iscale,:] + count = count + 1 + if jj == 1: + test_vgg_fea = np.zeros((noffset*nscale,in_shape[1],in_shape[2],in_shape[3]),dtype=np.float32) + print("{}:{}:{}".format( search_scale, nscale, search_offset, noffset)) + test_vgg_fea = extract_feature(np.copy(im),tmp_rcts,cos_win,padding_hw) + + + feed_dict = {pca_te.is_mean:pca_is_mean,pca_te.is_norm:pca_is_norm,pca_te.x_mean:pca_x_mean,\ + pca_te.x_norm:pca_x_norm,pca_te.w:pca_w,pca_te.vgg_fea:test_vgg_fea} + vgg_fea_pca = pca_sess.run(pca_te.vgg_fea_pca,feed_dict=feed_dict) + + feed_dict = {res_map.vgg_fea_pca:vgg_fea_pca,res_map.model_alphaf:model_alphaf,res_map.model_xf:model_xf} + response = res_sess.run(res_map.response,feed_dict=feed_dict) + mx_offset = 0 #get_max_offset(response) + mx_scale,maxres = get_max_scale(response[mx_offset]) + mx_layer = 0 + mxres0 = np.zeros(nn_p) + mx_hh = np.zeros(nn_p) + mx_ww = np.zeros(nn_p) + for ilayer in range(nn_p): + mxres0[ilayer],mx_hh[ilayer],mx_ww[ilayer] = get_max_ps(response[mx_offset,mx_scale[ilayer],ilayer,:,:],pmap_ctr_h,pmap_ctr_w) + tmp_pred_rcts[jj,ilayer,2:] = cur_rct[2:]*search_scale[mx_scale[ilayer],:] + window_sz,_,_ = fun_get_search_window2(tmp_pred_rcts[jj,ilayer,2:],None,padding_h,padding_w) + ratio = 1.0 * window_sz/wsz + tmp_pred_rcts[jj,ilayer,2:] = cur_rct[2:]*ratio + tmp_pred_rcts[jj,ilayer,0:2] = cur_rct[0:2] + 1.0*np.asarray([mx_ww[ilayer],mx_hh[ilayer]])*window_sz/fea_sz + search_offset[mx_offset,:] + tmp_pred_rcts[jj,ilayer,:] = fun_border_modification(np.copy(tmp_pred_rcts[jj,ilayer,:]),im.shape[0],im.shape[1]) + pred_rcts[jj,:] = np.mean(tmp_pred_rcts[jj,:,:],axis = 0) + cur_rct = np.copy(pred_rcts[jj,:]) + + #### + str1 = "[%3d-%3d/%3d]:[%3.2f,%3.2f,%3.2f,%3.2f],[%3.2f,%3.2f,%3.2f,%3.2f],[%.2f,%.2f,%.2f,%.2f,%.2f,%.2f][%.2f,%.2f,%.2f], [%d, %.3f]\n\t\t[%d,%.4f,%.2f,%.2f]\n\t\t%s\n\t\t%s\n\t\t%s" %(jj,n_img,ifdr,\ + gt_rcts[jj,0],gt_rcts[jj,1],gt_rcts[jj,2],gt_rcts[jj,3],\ + pred_rcts[jj,0], pred_rcts[jj,1],pred_rcts[jj,2],pred_rcts[jj,3],\ + search_scale[mx_scale[0],0],search_scale[mx_scale[1],0],search_scale[mx_scale[2],0],\ + search_scale[mx_scale[3],0],search_scale[mx_scale[4],0],search_scale[mx_scale[5],0],\ + mx_offset,search_scale[mx_scale[0],0],search_scale[mx_scale[0],1],flag_occ,update_factor,\ + mx_layer,mxres0[0], mx_ww[0], mx_hh[0], \ + vector2string(mx_ww,'float'),\ + vector2string(mx_hh,'float'),\ + vector2string(mxres0,'float')) + + logger.info(str1) + flag_occ = 0 + + str1 = '%04d.jpg' %(jj) #'T_%d.jpg' + fname = os.path.join(save_path,str1) + fun_draw_rct_on_image(X0[jj,:,:],fname,gt_rcts[jj,:],None,pred_rcts[jj,:]) + + str1 = 'T_%d_mask.jpg' %(jj) + fname = os.path.join(save_path,str1) + + str1 = 'prop_tr_%s_%d_%d.mat' %(fdr,iseq,jj) + fname = os.path.join(save_path,str1) + + + ################################################################# + ########################### Preparing ########################### + ################################################################# + window_sz,padding_h,padding_w = fun_get_search_window2(cur_rct[2:],im_sz,None,None) + padding_hw = np.array([padding_h,padding_w],dtype=np.float32) + vgg_fea = extract_feature(np.copy(im),np.expand_dims(cur_rct,0),cos_win,padding_hw) + if jj == 0: # pca + pca_projections = fea_pca_tr(np.copy(vgg_fea[0]), nn_p, pca_energy, pca_is_mean, pca_is_norm) + pca_x_mean = np.zeros((nn_p,1,512),dtype = np.float32) + pca_x_norm = np.zeros((nn_p),dtype = np.float32) + pca_w = np.zeros((nn_p,512,pca_energy),dtype = np.float32) + for itracker in range(nn_p): + pca_x_mean[itracker,:,:] = pca_projections[itracker][1] + pca_x_norm[itracker] = pca_projections[itracker][3] + pca_w[itracker,:,:] = pca_projections[itracker][4] + vgg_fea2 = fea_pca_te(np.copy(vgg_fea[0]), nn_p, pca_projections) + + ################### update model ################################### + if (jj%cf_nframe_update == 0 or jj < 5): + for kk in range(nn_p): + vgg_fea2[kk]=np.reshape(vgg_fea2[kk],(fea_sz[1],fea_sz[0],-1)) + vgg_fea2[kk]=(np.expand_dims(vgg_fea2[kk],axis=0)).transpose(0,3,1,2) + xf = np.fft.fft2(vgg_fea2[kk],axes=(-2,-1)) + alphaf = fun_w(xf,yf,kernel_type,kernel_sigma,kernel_gamma) # h*w + if jj==0: + model_alphaf[kk] = np.copy(alphaf) + model_xf[kk,:,:,:,:] =np.copy(xf) + else: + model_alphaf[kk] = (1-update_factor)*model_alphaf[kk] + update_factor*alphaf + model_xf[kk] = (1-update_factor)*model_xf[kk] + update_factor*xf + + + ## save all results + #sess_tr.close() + #sess_te.close() + if jj==n_img-1: + pcs_loc_mean,pcs_loc_curv, pcs_loc_diff = fun_precision_location(pred_rcts,gt_rcts) + pcs_olp_mean,pcs_olp_curv, pcs_olp_diff = fun_precision_overlap(pred_rcts,gt_rcts) + str1 = '[%s, %d, %.3f]--[%.4f,%.4f]\n' %(fdr,iseq, update_factor, pcs_loc_mean,pcs_olp_mean),\ + np.array2string(pcs_loc_curv.transpose(),precision=4,separator=', '),\ + np.array2string(pcs_olp_curv.transpose(),precision=4,separator=', ') + logger.info(str1) + close_logger(logger) + + str1 = 'result_%s_%d_%.3f.mat' %(fdr,iseq,update_factor) + fname = os.path.join(cache_path,str1) + save_mat_file(fname,gt_rcts,pred_rcts,pcs_loc_diff,pcs_olp_diff) + + vgg_sess.close() + pca_sess.close() + res_sess.close() diff --git a/tracking/utis.py b/tracking/utis.py new file mode 100644 index 0000000..4300475 --- /dev/null +++ b/tracking/utis.py @@ -0,0 +1,798 @@ +''' +some utilities. +-- modified by Cui Zhen, April 18, 2016 +''' +# utis.py +############ function list ############## +# build_w_b(rng,n_input,n_out,str_type,factor=1.) +# build_w_b_kernel(rng,kernel,str_type,factor=1.) +# create_dir(dir_name,folder = None) +# elewise_mlp_list(a,b) +# elewise_div_list(a,b) +# fun_ctr2rct(ctrs) +# fun_rct2ctr(rcts) +# infer_pred_shape(gt_lmks,ref_point,ref_width) +# get_convpool_out_shape(inshape,kernel,stride,poolsize) +# get_conv_out_shape(inshape,kernel,stride) +# get_layers_lr_wgt_mul(netparams) +# get_net_layerNo(netparams,keyname) +# get_patch_warping(img,ctr,tmp_sz,win_sz) +# get_path(dir_name, folder) +# get_tuple_from_tuple(x,idx) +# kmeans_cluster(x,k,isDataNorm,isCtrNorm) +# locations_of_substring(string, substring) +# load_vars6_dumps(filepath,n) +# load_variable_list(filepath) +# matrix2string(x,datatype) +# net_params_parsing(netparams) +# net_params_print(netparams,logger) +# pca_te(x, w, x_mean = None, x_norm = None) +# pca_tr(x, energy = 0.9, is_mean = True, is_norm = True) +# read_filenames(fpath,filetype) +# read_image(fpath,isColor,isPad) +# resize_batch(x,nm_h,nm_w,interp_type) +# save_mat_file(filename,data1,data2,data3,data4) +# save_vars6_dumps(filepath,x1,x2,x3,x4,x5,x6) +# save_variable_list(x,filepath,is_shared) +# split_fpaths(fpathfiles) +# swap_columns(x,i1,i2) +# tf_process_images_for_vgg(ims,normal_height,normal_width,interp = 'bilinear') +# vector2string(x,datatype): + +######################################### + +import glob +import os +import pdb +import pickle + +import numpy as np +import scipy.cluster as cluster +import scipy.io +import scipy.misc as MISC +from PIL import Image + + +def pca_tr(x, energy=0.9, is_mean=True, is_norm=True): + # x: n * d + z = np.transpose(x) # d*n + + if is_mean: + x_mean = np.mean(z, axis=1, keepdims=True) + x_mean = np.asarray(x_mean, dtype=x.dtype) + z = z - x_mean + x_mean = np.transpose(x_mean) + else: + x_mean = 0 + + if is_norm: + x_norm = np.linalg.norm(z, axis=1, keepdims=True) + x_norm = np.asarray(x_norm, dtype=x.dtype) + idx = np.where(x_norm < 1.0e-6) + x_norm[idx] = 1. + z = z / x_norm + x_norm = np.transpose(x_norm) + else: + x_norm = 1 + + # + d, n = z.shape + # pdb.set_trace() + if d >= n: + ztz = np.dot(np.transpose(z), z) + a, v = np.linalg.eig(ztz) + ind = np.argsort(a)[::-1] + ev = a[ind] + v = v[:, ind] + + if energy <= 1: + r = np.cumsum(ev) / np.sum(ev) + ind = np.where(r >= energy) + dim = ind[0] + else: + dim = energy + + a = ev[0:dim] + a = np.diag(1 / np.sqrt(a)) + v = v[:, 0:dim] + w = np.dot(np.dot(z, v), a) + else: + zzt = np.dot(z, np.transpose(z)) + a, v = np.linalg.eig(zzt) + ind = np.argsort(a)[::-1] + ev = a[ind] + v = v[:, ind] + + if energy <= 1: + r = np.cumsum(ev) / np.sum(ev) + ind = np.where(r >= energy) + dim = ind[0] + else: + dim = energy + + w = v[:, 0:dim] + + ## + w = np.asarray(w, dtype=x.dtype) + + return (is_mean, x_mean, is_norm, x_norm, w, ev, dim) + + +def pca_te(x, proj): + # x: n*d + # x_mean: 1*d + # x_norm: 1*d + # w: d*d2 + # output: n*d2 + is_mean, x_mean, is_norm, x_norm, w, _, _ = proj + if is_mean: + z = x - x_mean + else: + z = x + + if is_norm: + z = z / x_norm + + return np.dot(z, w) + + +def prep_image(im, normal_height, normal_width, normal_type, is_swap_axis, interp='bilinear'): + h, w, _ = im.shape + if normal_type == 'keep_aspect_ratio': + + if h != normal_height or w != normal_width: + r1 = 1. * normal_height / h + r2 = 1. * normal_width / w + if r1 > r2: + nh = normal_height + nw = np.floor(r1 * w + 0.5) + elif r2 > r1: + nw = normal_width + nh = np.floor(r2 * h + 0.5) + else: + nh = normal_height + nw = normal_width + nh = np.int32(nh) + nw = np.int32(nw) + im = MISC.imresize(im, [nh, nw], interp=interp) + + # Central crop + h, w, _ = im.shape + im = im[h // 2 - normal_height // 2:h // 2 + normal_height // + 2, w // 2 - normal_width // 2:w // 2 + normal_width // 2] + + elif normal_type == 'keep_all_content': + if h != normal_height or w != normal_width: + im = MISC.imresize(im, [normal_height, normal_width], interp='bilinear') + else: + print('normal_type error, please set or .') + + if is_swap_axis: + # Shuffle axes to c01 + im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1) + # Convert to BGR + im = im[::-1, :, :] + return im # floatX(im[np.newaxis]) + + +def create_dir(dir_name, folder=None): + if folder == None: + path = dir_name + else: + path = os.path.join(dir_name, folder) + + if not os.path.isdir(path): + os.mkdir(path) + + return path + + +def get_path(dir_name, folder): + + return os.path.join(dir_name, folder) + + +def resize_batch(x, nm_h, nm_w, interp_type): + # x: n*h*w*[c] + n = x.shape[0] + h = x.shape[1] + w = x.shape[2] + if h == nm_h and w == nm_w: + return x + if len(x.shape) == 4: + y = np.zeros((n, nm_h, nm_w, x.shape[3]), dtype=np.float32) + for ii in xrange(n): + y[ii, :, :, :] = MISC.imresize(x[ii], [nm_h, nm_w], interp=interp_type) + else: + y = np.zeros((n, nm_h, nm_w), dtype=np.float32) + for ii in xrange(n): + y[ii, :, :] = MISC.imresize(x[ii], [nm_h, nm_w], interp=interp_type) + return y + + +def get_patch_warping(img, ctr, tmp_sz, win_sz): + + img = np.float32(img) + if len(img.shape) == 3: + isColor = True + else: + isColor = False + h = img.shape[0] + w = img.shape[1] + + x = np.arange(1, win_sz[0] + 1) - win_sz[0] / 2 + 0.5 + y = np.arange(1, win_sz[1] + 1) - win_sz[1] / 2 + [x, y] = np.meshgrid(x, y) + p3 = tmp_sz[0] / win_sz[0] + # print p3,p3*tmp_sz[1]/win_sz[1] + yp = ctr[1] + y * (p3 * tmp_sz[1] / win_sz[1]) - 1 + xp = ctr[0] + x * p3 - 1 + + # save_mat_file('warping.mat',x,y,xp,yp) #?? + ## + x0 = np.int32(xp) + x1 = x0 + 1 + y0 = np.int32(yp) + y1 = y0 + 1 + + rx0 = xp - x0 + rx1 = 1 - rx0 + ry = yp - y0 + + # -- + + x0_bool = (x0 < 0) + (x0 > w - 1) + x1_bool = (x1 < 0) + (x1 > w - 1) + y0_bool = (y0 < 0) + (y0 > h - 1) + y1_bool = (y1 < 0) + (y1 > h - 1) + + x0[x0_bool] = 0 + x1[x1_bool] = 0 + y0[y0_bool] = 0 + y1[y1_bool] = 0 + + if isColor == True: + patch = np.zeros((win_sz[1], win_sz[0], 3)) + for ii in range(3): + patch[:, :, ii] = (rx1 * img[y0, x0, ii] * (~(y0_bool + x0_bool)) + rx0 * img[y0, x1, ii] * (~(y0_bool + x1_bool))) * (1 - ry) + \ + (rx1 * img[y1, x0, ii] * (~(y1_bool + x0_bool)) + rx0 * img[y1, x1, ii] * (~(y1_bool + x1_bool))) * ry + else: + patch = (rx1 * img[y0, x0] * (~(y0_bool + x0_bool)) + rx0 * img[y0, x1] * (~(y0_bool + x1_bool))) * (1 - ry) + \ + (rx1 * img[y1, x0] * (~(y1_bool + x0_bool)) + rx0 * img[y1, x1] * (~(y1_bool + x1_bool))) * ry + + patch[patch < 0] = 0 + patch[patch > 255] = 255 + return np.uint8(patch + 0.5) + + +def elewise_mlp_list(a, b): + if len(a) != len(b) and len(a) != 1 and len(b) != 1: + print('error: len(a)!=len(b).') + ab = None + elif len(a) == 1: + ab = [a[0] * b[i] for i in range(len(b))] + elif len(b) == 1: + ab = [a[i] * b[0] for i in range(len(a))] + else: + ab = [a[i] * b[i] for i in range(len(a))] + return ab + + +def elewise_div_list(a, b): + if len(a) != len(b) and len(a) != 1 and len(b) != 1: + print('error: len(a)!=len(b).') + ab = None + elif len(a) == 1: + ab = [a[0] / b[i] for i in range(len(b))] + elif len(b) == 1: + ab = [a[i] / b[0] for i in range(len(a))] + else: + ab = [a[i] / b[i] for i in range(len(a))] + return ab + + +def fun_rct2ctr(rcts): + # rcts: [w,h,wwidth,hwidth] + ctrs = np.copy(rcts) + ctrs[:, 0] = rcts[:, 0] + rcts[:, 2] / 2 + ctrs[:, 1] = rcts[:, 1] + rcts[:, 3] / 2 + return ctrs + + +def fun_ctr2rct(ctrs): + # ctrs: [cw,ch,wwidth,hwidth] + rcts = np.copy(ctrs) + rcts[:, 0] = ctrs[:, 0] - ctrs[:, 2] / 2 + rcts[:, 1] = ctrs[:, 1] - ctrs[:, 3] / 2 + return rcts + + +def read_image(fpath, isColor, isPad, padPos): + ''' + isColor: need produce rgb channels + isPad: when isColor == True: gray image (2D) --> color image (3D) + padPos: if isColor == True and isPad == True, for padPos ==0, we have output as c*h*w, for padPos == -1, h*w*c + ''' + if isColor: + img = np.array(Image.open(fpath)) + if len(img.shape) == 2 and isPad: + assert(padPos == 0 or padPos == -1) + if padPos == -1: + img = np.dstack((img, img, img)) + if padPos == 0: + img = np.expand_dims(img, axis=0) + img = np.concatenate((img, img, img), axis=padPos) + if len(img.shape) == 3: + assert(padPos == 0 or padPos == -1) + if padPos == 0: + img = np.moveaxes(img, [0, 1, 2], [2, 0, 1]) + else: + img = np.array(Image.open(fpath).convert('L')) + return img + + +def get_layers_lr_wgt_mul(netparams): + lr_mul = [] + wgt_mul = [] + for ii in xrange(len(netparams)): + if netparams[ii].has_key('lr_mul'): + lr_mul = np.append(lr_mul, netparams[ii]['lr_mul']) + wgt_mul = np.append(wgt_mul, netparams[ii]['weight_mul']) + lr_mul = np.float32(lr_mul) + wgt_mul = np.float32(wgt_mul) + return lr_mul, wgt_mul + + +def get_net_layerNo(netparams, keyname): + flag = False + for ii in xrange(len(netparams)): + if netparams[ii]['key'] == keyname: + flag = True + break + if flag: + return ii + else: + print(keyname) + return -2 + + +# use after parsing +def net_params_print(netparams, logger): + nlayer = len(netparams) + str = '-- net: %d layers --' % nlayer + logger.info(str) + str = '' + for ii in xrange(nlayer): + keyname = netparams[ii]['key'] + if netparams[ii]['name'] == 'conv': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + conv_pad = netparams[ii]['conv_pad'] + + str_kernel = vector2string(kernel, 'int') + str_conv_stride = vector2string(conv_stride, 'int') + str_conv_pad = vector2string(conv_pad, 'int') + str = str + '\n\t**[%2d--%s] conv layer: \n\t\t<%s>\t<%s>\t<%s>' % (ii, keyname, str_kernel, + str_conv_stride, str_conv_pad + ) + elif netparams[ii]['name'] == 'convlinear': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + conv_pad = netparams[ii]['conv_pad'] + + str_kernel = vector2string(kernel, 'int') + str_conv_stride = vector2string(conv_stride, 'int') + str_conv_pad = vector2string(conv_pad, 'int') + str = str + '\n\t**[%2d--%s] convlinear layer: \n\t\t<%s>\t<%s>\t<%s>' % (ii, keyname, str_kernel, + str_conv_stride, str_conv_pad + ) + + elif netparams[ii]['name'] == 'convpool': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + conv_pad = netparams[ii]['conv_pad'] + pool_size = netparams[ii]['pool_size'] + pool_pad = netparams[ii]['pool_pad'] + + str_kernel = vector2string(kernel, 'int') + str_conv_stride = vector2string(conv_stride, 'int') + str_pool_size = vector2string(pool_size, 'int') + str_conv_pad = vector2string(conv_pad, 'int') + str_pool_pad = vector2string(pool_pad, 'int') + str = str + '\n\t**[%2d--%s] convpool layer: \n\t\t<%s>\t<%s>\t<%s>\n\t\t<%s>\t<%s>' % (ii, keyname, str_kernel, + str_conv_stride, str_conv_pad, + str_pool_size, str_pool_pad) + elif netparams[ii]['name'] == 'convdroppool': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + conv_pad = netparams[ii]['conv_pad'] + pool_size = netparams[ii]['pool_size'] + pool_pad = netparams[ii]['pool_pad'] + drop_rate = netparams[ii]['drop_rate'] + + str_kernel = vector2string(kernel, 'int') + str_conv_stride = vector2string(conv_stride, 'int') + str_pool_size = vector2string(pool_size, 'int') + str_conv_pad = vector2string(conv_pad, 'int') + str_pool_pad = vector2string(pool_pad, 'int') + str_drop_rate = '%.4f' % drop_rate + # print str_drop_rate + str = str + '\n\t**[%2d--%s] convdroppool layer: \n\t\t<%s>\t<%s>\t<%s>\n\t\t<%s>\t<%s>\n\t\t<%s>' % (ii, keyname, str_kernel, + str_conv_stride, str_conv_pad, + str_pool_size, str_pool_pad, str_drop_rate) + elif netparams[ii]['name'] == 'input': + str = str + '\n\t**%2d input layer:' % ii + elif netparams[ii]['name'] == 'flat': + str = str + '\n\t**%2d flat layer:' % ii + elif netparams[ii]['name'] == 'swapdim': + str = str + '\n\t**%2d swapdim layer:' % ii + elif netparams[ii]['name'] == 'rnn': + hid_dim = netparams[ii]['hid_dim'] + str = str + '\n\t**[%2d--%s] rnn layer: \n\t\t<%4d> ' % (ii, keyname, hid_dim) + elif netparams[ii]['name'] == 'strnn': + hid_dim = netparams[ii]['hid_dim'] + layername = netparams[ii]['hid_dim'] + direction = netparams[ii]['direction'] + str = str + '\n\t**[%2d--%s] strnn layer <%s>: \n\t\t<%s>\t<%4d> ' % (ii, keyname, layername, direction, hid_dim) + elif netparams[ii]['name'] == 'srnn': + hid_dim = netparams[ii]['hid_dim'] + layername = netparams[ii]['hid_dim'] + direction = netparams[ii]['direction'] + str = str + '\n\t**[%2d--%s] srnn layer <%s>: \n\t\t<%s>\t<%4d> ' % (ii, keyname, layername, direction, hid_dim) + elif netparams[ii]['name'] == 'dropout': + drop_rate = netparams[ii]['drop_rate'] + str = str + '\n\t**[%2d--%s] rnn layer: \n\t\t<%.4f> ' % (ii, keyname, drop_rate) + elif netparams[ii]['name'] == 'align': + hid_dim = netparams[ii]['hid_dim'] + str = str + '\n\t**[%2d--%s] align layer: \n\t\t<%4d> ' % (ii, keyname, hid_dim) + elif netparams[ii]['name'] == 'channel': + #class_num = netparams[ii]['class_num'] + str = str + '\n\t**[%2d--%s] channel layer: ' % (ii, keyname) + elif netparams[ii]['name'] == 'fc': + out_dim = netparams[ii]['out_dim'] + str = str + '\n\t**[%2d--%s] fc layer: \n\t\t<%4d> ' % (ii, keyname, out_dim) + elif netparams[ii]['name'] == 'logreg': + class_num = netparams[ii]['class_num'] + str = str + '\n\t**[%2d--%s] loss layer: \n\t\t<%4d> ' % (ii, keyname, class_num) + elif netparams[ii]['name'] == 'softmax': + str = str + '\n\t**[%2d--%s] softmax layer: ' % (ii, keyname) + elif netparams[ii]['name'] == 'meanshift': + kernel = netparams[ii]['kernel'] + stride = (1, 1) + + str_kernel = vector2string(kernel, 'int') + str_stride = vector2string(stride, 'int') + str = str + '\n\t**%2d meanshift layer: \n\t\t<%s>\n\t\t<%s>' % (ii, str_kernel, str_stride) + else: + print('error') + str_in_shape = vector2string(netparams[ii]['in_shape'], 'int') + str_out_shape = vector2string(netparams[ii]['out_shape'], 'int') + toplayer = netparams[ii]['top'] + str = str + '\n\t\t[ --%s ]\n\t\t--inshape: %s \n\t\t--outshape: %s ' % (toplayer, str_in_shape, str_out_shape) + logger.info(str) + + +# netparams: list of dics +# input_shape: (batch_size,channel,height,width) +def net_params_parsing(netparams): + nlayer = len(netparams) + for ii in xrange(nlayer): + if ii != 0: # ?? + kk = get_net_layerNo(netparams, netparams[ii]['top']) + netparams[ii]['in_shape'] = netparams[kk]['out_shape'] + in_shape = netparams[ii]['in_shape'] + + if netparams[ii]['name'] == 'conv' or netparams[ii]['name'] == 'convlinear': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + out_shape, conv_pad = get_conv_out_shape(in_shape, kernel, conv_stride) + netparams[ii]['conv_pad'] = conv_pad + elif netparams[ii]['name'] == 'convpool' or netparams[ii]['name'] == 'convdroppool': + kernel = netparams[ii]['kernel'] + conv_stride = netparams[ii]['conv_stride'] + pool_size = netparams[ii]['pool_size'] + pool_stride = netparams[ii]['pool_stride'] + out_shape, conv_pad, pool_pad = get_convpool_out_shape(in_shape, kernel, conv_stride, pool_size, pool_stride) + netparams[ii]['conv_pad'] = conv_pad + netparams[ii]['pool_pad'] = pool_pad + elif netparams[ii]['name'] == 'flat': + out_shape = (in_shape[0], np.prod(in_shape[1:])) + elif netparams[ii]['name'] == 'dropout': + out_shape = in_shape + elif netparams[ii]['name'] == 'rnn': + hid_dim = netparams[ii]['hid_dim'] + out_shape = (in_shape[0], 1, in_shape[2], hid_dim) + elif netparams[ii]['name'] == 'strnn' or netparams[ii]['name'] == 'srnn': + hid_dim = netparams[ii]['hid_dim'] + out_shape = (in_shape[0], hid_dim, in_shape[2], in_shape[3]) + elif netparams[ii]['name'] == 'channel': + out_shape = (in_shape[0] * in_shape[2] * in_shape[3], in_shape[1]) + elif netparams[ii]['name'] == 'align': + out_shape = (in_shape[0], in_shape[3]) + elif netparams[ii]['name'] == 'swapdim': + out_shape = (in_shape[0], in_shape[2], in_shape[1], in_shape[3]) + elif netparams[ii]['name'] == 'fc': + out_dim = netparams[ii]['out_dim'] + out_shape = (in_shape[0], out_dim) + elif netparams[ii]['name'] == 'logreg': + class_num = netparams[ii]['class_num'] + out_shape = (in_shape[0], class_num) + elif netparams[ii]['name'] == 'meanshift': + kernel = netparams[ii]['kernel'] + stride = (1, 1) + poolsize = (1, 1) + out_shape = get_convpool_out_shape(in_shape, kernel, stride, poolsize) + elif netparams[ii]['name'] == 'input': + out_shape = in_shape + elif netparams[ii]['name'] == 'softmax': + out_shape = (in_shape[0], 1, in_shape[2], in_shape[3]) + else: + print('error') + netparams[ii]['out_shape'] = out_shape + + # if netparams[ii].has_key('layer') and netparams[ii-1].has_key('layer'): + # if netparams[ii]['layer'] == netparams[ii-1]['layer']: + # in_shape = netparams[ii-1]['in_shape'] + # out_shape = netparams[ii-1]['out_shape'] + return netparams + + +def get_conv_out_shape(inshape, kernel, conv_stride): + # print inshape,kernel,stride + conv_pad = (kernel[2] // 2, kernel[3] // 2) + h = (np.arange(0, inshape[2] - kernel[2] + 1 + 2 * conv_pad[0], conv_stride[0])).shape[0] + w = (np.arange(0, inshape[3] - kernel[3] + 1 + 2 * conv_pad[1], conv_stride[1])).shape[0] + out_shape = (inshape[0], kernel[0], h, w) + return out_shape, conv_pad + + +def get_convpool_out_shape(inshape, kernel, conv_stride, pool_size, pool_stride): + # print inshape,kernel,stride + conv_pad = (kernel[2] // 2, kernel[3] // 2) + h = (np.arange(0, inshape[2] - kernel[2] + 1 + 2 * conv_pad[0], conv_stride[0])).shape[0] + w = (np.arange(0, inshape[3] - kernel[3] + 1 + 2 * conv_pad[1], conv_stride[1])).shape[0] + pool_pad = (0, 0) # (pool_size[0]//2,pool_size[1]//2) + h = (np.arange(0, h - pool_size[0] + 1 + 2 * pool_pad[0], pool_stride[0])).shape[0] + w = (np.arange(0, w - pool_size[1] + 1 + 2 * pool_pad[1], pool_stride[1])).shape[0] + #h = h//poolsize[0] + #w = w//poolsize[1] + out_shape = (inshape[0], kernel[0], h, w) + return out_shape, conv_pad, pool_pad + + +def matrix2string(x, datatype): + h = x.shape[0] + w = x.shape[1] + str = '\n[\n' + for ii in xrange(h): + for jj in xrange(w): + if datatype == 'int': + s = '%6d, ' % (x[ii, jj]) + elif datatype == 'float': + s = '%6.4f, ' % (x[ii, jj]) + str = str + s + str = str + '\n' + str = str + '\n]\n' + return str + + +def vector2string(x, datatype): + n = len(x) + str = '' + for ii in range(n): + if datatype == 'int': + s = '%6d, ' % (x[ii]) + elif datatype == 'float': + s = '%6.4f, ' % (x[ii]) + str = str + s + str = str + '' + return str + + +def get_tuple_from_tuple(x, idx): + y = [] + for ii in idx: + y.append(x[ii]) + return y + + +def locations_of_substring(string, substring): + """Return a list of locations of a substring.""" + + substring_length = len(substring) + + def recurse(locations_found, start): + location = string.find(substring, start) + if location != -1: + return recurse(locations_found + [location], location + substring_length) + else: + return locations_found + + return recurse([], 0) + + +def read_filenames(fpath, filetype): + filepaths = glob.glob(fpath + '/*' + filetype) + nfilepath = len(filepaths) + filepaths = sorted(filepaths, key=str.lower) + _, files = split_fpaths(filepaths) + return filepaths, files, nfilepath + + +def split_fpaths(fpathfiles): + n = len(fpathfiles) + fpaths = [] + fnames = [] + for ii in xrange(n): + fpath, fname = os.path.split(fpathfiles[ii]) + fpaths.append(fpath) + fnames.append(fname) + return fpaths, fnames + + +def save_mat_file(filename, data1, data2, data3, data4): + mdic = {'data1': data1} + if data2 is not None: + mdic['data2'] = data2 + if data3 is not None: + mdic['data3'] = data3 + if data4 is not None: + mdic['data4'] = data4 + # print mdic + scipy.io.savemat(filename, mdic) + + +def save_variable_list(x, filepath, is_shared): + if is_shared == 1: + y = [] + for xi in x: + y.append(xi.get_value()) + fp = open(filepath, "wb") + cPickle.dump(y, fp, protocol=-1) + fp.close() + else: + fp = open(filepath, "wb") + cPickle.dump(x, fp, protocol=-1) + fp.close() + + +def load_variable_list(filepath): + fp = open(filepath, "rb") + y = cPickle.load(fp) + fp.close() + return y + + +def infer_pred_shape(gt_lmks, ref_point, ref_width): + n = gt_lmks.shape[0] + n_lmk = gt_lmks.shape[1] / 2 + idx_h = np.arange(0, n_lmk * 2, 2) + idx_w = np.arange(1, n_lmk * 2, 2) + rand_idx = np.random.permutation(n) + pred_lmks = np.zeros((n, n_lmk * 2), dtype=np.float32) + pred_lmks[:, idx_h] = (gt_lmks[:, idx_h] - ref_point[:, 0].reshape(-1, 1)) / ref_width + pred_lmks[:, idx_w] = (gt_lmks[:, idx_w] - ref_point[:, 1].reshape(-1, 1)) / ref_width + pred_lmks = pred_lmks[rand_idx, :]; + pred_lmks[:, idx_h] = pred_lmks[:, idx_h] * ref_width + ref_point[:, 0].reshape(-1, 1) + pred_lmks[:, idx_w] = pred_lmks[:, idx_w] * ref_width + ref_point[:, 1].reshape(-1, 1) + + return pred_lmks + + +def kmeans_cluster(x, k, isDataNorm, isCtrNorm): + # x: n*d + if isDataNorm: + nm = np.sqrt(np.sum(x**2, axis=-1, keepdims=True)) + x = x / nm + # no whiten + centriod, label = cluster.vq.kmeans2(x, k, iter=500, minit='points') + if isCtrNorm: + nm = np.sqrt(np.sum(centriod**2, axis=-1, keepdims=True)) + centriod = centriod / nm + return centriod, label + +## + + +def swap_columns(x, i1, i2): + temp = np.copy(x[:, i1]) + x[:, i1] = x[:, i2] + x[:, i2] = temp + return x + + +def save_vars6_dumps(filepath, x1, x2, x3, x4, x5, x6): + fp = open(filepath, "wb") + cPickle.dump(x1, fp, protocol=-1) + if x2 is not None: + cPickle.dump(x2, fp, protocol=-1) + if x3 is not None: + cPickle.dump(x3, fp, protocol=-1) + if x4 is not None: + cPickle.dump(x4, fp, protocol=-1) + if x5 is not None: + cPickle.dump(x5, fp, protocol=-1) + if x6 is not None: + cPickle.dump(x6, fp, protocol=-1) + fp.close() + + +def load_vars6_dumps(filepath, n): + fp = open(filepath, "rb") + x1 = cPickle.load(fp) + if n == 1: + fp.close() + return x1 + + x2 = cPickle.load(fp) + if n == 2: + fp.close() + return x1, x2 + + x3 = cPickle.load(fp) + if n == 3: + fp.close() + return x1, x2, x3 + + x4 = cPickle.load(fp) + if n == 4: + fp.close() + return x1, x2, x3, x4 + + x5 = cPickle.load(fp) + if n == 5: + fp.close() + return x1, x2, x3, x4, x5 + + x6 = cPickle.load(fp) + if n == 6: + fp.close() + return x1, x2, x3, x4, x5, x6 + + +def build_w_b_kernel(rng, kernel, str_type, factor=1.): + fan_in = np.prod(kernel[1:]) + fan_out = kernel[0] * np.prod(kernel[2:]) + + if str_type == 'uniform': + thre = np.sqrt(6. / (fan_in + fan_out)) * factor + W = np.asarray(rng.uniform(size=kernel, low=-thre, high=thre), dtype=theano.config.floatX) + b = np.zeros((kernel[0],), dtype=theano.config.floatX) + elif str_type == 'gaussian': + W = np.asarray(rng.normal(loc=factor[0], scale=factor[1], size=kernel), dtype=theano.config.floatX) + b = np.zeros((kernel[0],), dtype=theano.config.floatX) + return W, b + + +''' + For a :class:`DenseLayer `, if ``gain='relu'`` + and ``initializer=Uniform``, the weights are initialized as + .. math:: + a &= \\sqrt{\\frac{12}{fan_{in}+fan_{out}}}\\\\ + W &\sim U[-a, a] + If ``gain=1`` and ``initializer=Normal``, the weights are initialized as + .. math:: + \\sigma &= \\sqrt{\\frac{2}{fan_{in}+fan_{out}}}\\\\ + W &\sim N(0, \\sigma) +''' + + +def build_w_b(rng, n_in, n_out, str_type, factor): + if str_type == 'uniform': + thre = np.sqrt(12. / (n_in + n_out)) * factor + # print thre + W = np.asarray(rng.uniform(size=(n_in, n_out), + low=-thre, high=thre), dtype=theano.config.floatX) + b = np.zeros((n_out,), dtype=theano.config.floatX) + #b = b + elif str_type == 'zeros': + W = np.asarray(np.zeros((n_in, n_out)), dtype=theano.config.floatX) + b = np.zeros((n_out,), dtype=theano.config.floatX) + elif str_type == 'identity': + thre = factor + W = np.asarray(np.eye(n_in) * thre, dtype=theano.config.floatX) + b = np.zeros((n_out,), dtype=theano.config.floatX) + elif str_type == 'gaussian': + W = np.asarray(rng.normal(loc=factor[0], scale=factor[1], size=(n_in, n_out)), dtype=theano.config.floatX) + b = np.zeros((n_out,), dtype=theano.config.floatX) + return W, b diff --git a/tracking/vgg19_tf.py b/tracking/vgg19_tf.py new file mode 100644 index 0000000..7b68894 --- /dev/null +++ b/tracking/vgg19_tf.py @@ -0,0 +1,177 @@ +import os +import tensorflow as tf + +import numpy as np +import time +import inspect + +VGG_MEAN = [103.939, 116.779, 123.68] +downscale_size=[57,57] +class Vgg19: + def __init__(self, vgg19_npy_path=None, inds_outlayers = [19]): + if vgg19_npy_path is None: + path = inspect.getfile(Vgg19) + path = os.path.abspath(os.path.join(path, os.pardir)) + path = os.path.join(path, "vgg19.npy") + vgg19_npy_path = path + print(vgg19_npy_path) + + self.data_dict = np.load(vgg19_npy_path, encoding='latin1').item() + print("npy file loaded") + + self.images = tf.placeholder("float", [None, 224, 224, 3]) + self.nscale = tf.placeholder("int32") + self.cos_win = tf.placeholder("float",[1,57,57,1]) + with tf.name_scope("content_vgg"): + self.build(self.images) + + self.get_fea_layers(inds_outlayers) + self.resize_map(downscale_size,vgg_fea=None) + + def get_fea_layers(self, inds_outlayers): + + all_layers = [self.conv1_1,self.conv1_2, self.conv2_1,self.conv2_2, \ + self.conv3_1, self.conv3_2, self.conv3_3, self.conv3_4,\ + self.conv4_1,self.conv4_2, self.conv4_3, self.conv4_4,\ + self.conv5_1,self.conv5_2, self.conv5_3, self.conv5_4,\ + self.fc6, self.fc7, self.fc8, self.prob] + #keywords = ['conv1_1','conv1_2','conv2_1','conv2_2','conv3_1','conv3_2','conv3_3','conv3_4','conv4_1','conv4_2','conv4_3',\ + # 'conv4_4','conv5_1','conv5_2','conv5_3','conv5_4','fc6','fc7','fc8','prob'] + self.mapnums = np.asarray((64,64,128,128,256,256,256,256,512,512,512,512,512,512,512,512,4096,4096,1000,1000)) + + self.out_layers = [] + self.out_num_maps = [] + for ix in inds_outlayers: + #if isLRN == False: + # self.outlayers.append(self.vggnet[keywords[ix-1]]) + #else: + self.out_layers.append(all_layers[ix-1]) + self.out_num_maps.append(self.mapnums[ix-1]) + + ## + self.out_num_maps = np.asarray(self.out_num_maps) + x = np.cumsum(self.out_num_maps) + self.out_map_nlayer = len(x) + self.out_map_idx = np.zeros(self.out_map_nlayer + 1,dtype=np.int32) + self.out_map_idx[1:] = x + self.out_map_total = x[-1] + + def resize_map(self, downscale_size, vgg_fea=None): # outx[ii,:,:,count] + self.high_res_patch = self.out_layers # self.out_layers(list):6{1*28*28*512}(1*14*14*512) + outx = [] + if vgg_fea == None: + self.vgg_fea = tf.zeros([self.nscale, downscale_size[0], downscale_size[1], self.out_map_total], dtype=tf.float32) + self.vgg_fea1 = tf.zeros([self.nscale, downscale_size[0], downscale_size[1], self.out_map_total], dtype=tf.float32) + for hmap in self.high_res_patch: # hmap={1*28*28*512} + self.resize_cubic = tf.image.resize_images(hmap, [downscale_size[0], downscale_size[1]],method=2) + outx.append(self.resize_cubic) + outx = tf.concat(outx, 3) + self.vgg_fea = outx + self.vgg_fea1 = tf.multiply(self.vgg_fea,self.cos_win) + + def build(self, rgb): + """ + load variable from npy to build the VGG + + :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] + """ + + start_time = time.time() + print("build model started") + rgb_scaled = rgb # rgb * 255.0 ?? + + # Convert RGB to BGR + red, green, blue = tf.split(rgb_scaled,3, 3) + assert red.get_shape().as_list()[1:] == [224, 224, 1] + assert green.get_shape().as_list()[1:] == [224, 224, 1] + assert blue.get_shape().as_list()[1:] == [224, 224, 1] + bgr = tf.concat( [ + blue - VGG_MEAN[0], + green - VGG_MEAN[1], + red - VGG_MEAN[2], + ],3) + assert bgr.get_shape().as_list()[1:] == [224, 224, 3] + # import pdb; pdb.set_trace() + self.conv1_1 = self.conv_layer(bgr, "conv1_1") + self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2") + self.pool1 = self.max_pool(self.conv1_2, 'pool1') + + self.conv2_1 = self.conv_layer(self.pool1, "conv2_1") + self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2") + self.pool2 = self.max_pool(self.conv2_2, 'pool2') + + self.conv3_1 = self.conv_layer(self.pool2, "conv3_1") + self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2") + self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3") + self.conv3_4 = self.conv_layer(self.conv3_3, "conv3_4") + self.pool3 = self.max_pool(self.conv3_4, 'pool3') + + self.conv4_1 = self.conv_layer(self.pool3, "conv4_1") + self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2") + self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3") + self.conv4_4 = self.conv_layer(self.conv4_3, "conv4_4") + self.pool4 = self.max_pool(self.conv4_4, 'pool4') + + self.conv5_1 = self.conv_layer(self.pool4, "conv5_1") + self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2") + self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3") + self.conv5_4 = self.conv_layer(self.conv5_3, "conv5_4") + self.pool5 = self.max_pool(self.conv5_4, 'pool5') + + self.fc6 = self.fc_layer(self.pool5, "fc6") + assert self.fc6.get_shape().as_list()[1:] == [4096] + self.relu6 = tf.nn.relu(self.fc6) + + self.fc7 = self.fc_layer(self.relu6, "fc7") + self.relu7 = tf.nn.relu(self.fc7) + + self.fc8 = self.fc_layer(self.relu7, "fc8") + + self.prob = tf.nn.softmax(self.fc8, name="prob") + + self.data_dict = None + print("build model finished: %ds" % (time.time() - start_time)) + + def avg_pool(self, bottom, name): + return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) + + def max_pool(self, bottom, name): + return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name) + + def conv_layer(self, bottom, name): + with tf.variable_scope(name): + filt = self.get_conv_filter(name) + + conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME') + + conv_biases = self.get_bias(name) + bias = tf.nn.bias_add(conv, conv_biases) + + relu = tf.nn.relu(bias) + return relu + + def fc_layer(self, bottom, name): + with tf.variable_scope(name): + shape = bottom.get_shape().as_list() + dim = 1 + for d in shape[1:]: + dim *= d + x = tf.reshape(bottom, [-1, dim]) + + weights = self.get_fc_weight(name) + biases = self.get_bias(name) + + # Fully connected layer. Note that the '+' operation automatically + # broadcasts the biases. + fc = tf.nn.bias_add(tf.matmul(x, weights), biases) + + return fc + + def get_conv_filter(self, name): + return tf.constant(self.data_dict[name][0], name="filter") + + def get_bias(self, name): + return tf.constant(self.data_dict[name][1], name="biases") + + def get_fc_weight(self, name): + return tf.constant(self.data_dict[name][0], name="weights") diff --git a/tracking/vgg_utis.py b/tracking/vgg_utis.py new file mode 100644 index 0000000..66e9b42 --- /dev/null +++ b/tracking/vgg_utis.py @@ -0,0 +1,188 @@ +import skimage +import skimage.io +import skimage.transform +import numpy as np +import scipy.misc as MISC +from scipy import interpolate +import pdb + +# synset = [l.strip() for l in open('synset.txt').readlines()] + +def vgg_resize_maps(map_list,normal_hw, interp = 'bilinear', outx = None): + # map_list: list of 4d tesors [n,h,w,c_i] or 4d tensor [n,h,w,c] + # maps: 4d tensor, n*nh*nw*\sum{c_i}\ + # pdb.set_trace() + nmh,nmw = normal_hw + assert(type(map_list) is list) + #nlayer = len(map_list) + n = map_list[0].shape[0] + total_map = 0 + for imap in map_list: + total_map += imap.shape[-1] + + if outx is None: + outx = np.zeros((n,nmh,nmw,total_map),dtype = np.float32) + count = 0 + if interp == 'bilinear': + ip_type = 'linear' + elif interp == 'bicubic': + ip_type = 'cubic' + else: + assert(3==5) + + for imap in map_list: + n,h,w,c = imap.shape + + h0_seq = np.arange(0,nmh-1.0e-6,nmh*1./h) + w0_seq = np.arange(0,nmw-1.0e-6,nmw*1./w) + h1_seq = np.arange(nmh) + w1_seq = np.arange(nmw) + + for jj in range(c): + for ii in range(n): + #maps[ii,:,:,count] = vgg_process_one_image(imap[ii,:,:,jj],normal_height,normal_width,normal_type, False, interp_tool, interp) + f = interpolate.interp2d(w0_seq,h0_seq,imap[ii,:,:,jj],kind=ip_type) + outx[ii,:,:,count] = f(w1_seq,h1_seq) + count += 1 + + #return maps + + +def vgg_resize_image(im, nh, nw, interp_tool ='misc', interp = 'bilinear'): + + if interp_tool =='misc': + im = MISC.imresize(im,[nh,nw],interp=interp) + elif interp_tool == 'skimage': + im = im / 255.0 + assert (0 <= im).all() and (im <= 1.0).all() + im = skimage.transform.resize(im, (nh, nw)) + im = im*255.0 + else: + assert(interp_tool=='misc' and interp_tool=='skimage') + + return im + + + +def vgg_process_one_image(im,normal_height,normal_width,normal_type,is_swap_axis, interp_tool ='misc', interp = 'bilinear'): + + h = im.shape[0] + w = im.shape[1] + if normal_type == 'keep_aspect_ratio': + + if h!=normal_height or w!=normal_width: + r1 = 1.*normal_height/h + r2 = 1.*normal_width/w + if r1 > r2: + nh = normal_height + nw = np.floor(r1*w + 0.5) + elif r2 > r1: + nw = normal_width + nh = np.floor(r2*h + 0.5) + else: + nh = normal_height + nw = normal_width + nh = np.int32(nh) + nw = np.int32(nw) + + im = vgg_resize_image(im, nh, nw, interp_tool, interp) + + # Central crop + h, w, _ = im.shape + im = im[h//2-normal_height//2:h//2+normal_height//2, w//2-normal_width//2:w//2+normal_width//2] + + elif normal_type == 'keep_all_content': + if h!=normal_height or w!=normal_width: + im = vgg_resize_image(im, normal_height,normal_width, interp_tool, interp) + #MISC.imresize(im,[normal_height,normal_width],interp='bilinear') + else: + print('normal_type error, please set or .') + + if is_swap_axis: + # Shuffle axes to c01 + im = np.swapaxes(np.swapaxes(im, 1, 2), 0, 1) + # Convert to BGR + im = im[::-1, :, :] + return im #floatX(im[np.newaxis]) + +def vgg_process_images(ims,normal_hw,normal_type='keep_aspect_ratio',interp_tool ='misc', interp = 'bilinear'): + # ims: list of 3d tesors [h,w,c] or 4d tensor [n,h,w,c] + # ims_out: 4d tensor, n*nh*nw*nc + normal_height,normal_width = normal_hw + flag = type(ims) is list + if flag: + n = len(ims) + c = ims[0].shape[-1] + else: + n, _, _, c = ims.shape + ims_out = np.zeros((n,normal_height,normal_width,c),dtype = np.float32) + for ii in range(n): + h,w,_ = ims[ii].shape + ims_out[ii] = vgg_process_one_image(ims[ii],normal_height,normal_width,normal_type, False, interp_tool, interp) + + return ims_out + + +# returns image of shape [224, 224, 3] +# [height, width, depth] +def load_image(path): + # load image + img = skimage.io.imread(path) + img = img / 255.0 + assert (0 <= img).all() and (img <= 1.0).all() + # print "Original Image Shape: ", img.shape + # we crop image from center + short_edge = min(img.shape[:2]) + yy = int((img.shape[0] - short_edge) / 2) + xx = int((img.shape[1] - short_edge) / 2) + crop_img = img[yy: yy + short_edge, xx: xx + short_edge] + # resize to 224, 224 + resized_img = skimage.transform.resize(crop_img, (224, 224)) + return resized_img + + +# returns the top1 string +def print_prob(prob, file_path): + synset = [l.strip() for l in open(file_path).readlines()] + + # print prob + pred = np.argsort(prob)[::-1] + + # Get top1 label + top1 = synset[pred[0]] + print("Top1: ", top1, prob[pred[0]]) + # Get top5 label + top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)] + print("Top5: ", top5) + return top1 + + +def load_image2(path, height=None, width=None): + # load image + img = skimage.io.imread(path) + img = img / 255.0 + if height is not None and width is not None: + ny = height + nx = width + elif height is not None: + ny = height + nx = img.shape[1] * ny / img.shape[0] + elif width is not None: + nx = width + ny = img.shape[0] * nx / img.shape[1] + else: + ny = img.shape[0] + nx = img.shape[1] + return skimage.transform.resize(img, (ny, nx)) + + +def test(): + img = skimage.io.imread("./test_data/starry_night.jpg") + ny = 300 + nx = img.shape[1] * ny / img.shape[0] + img = skimage.transform.resize(img, (ny, nx)) + skimage.io.imsave("./test_data/test/output.jpg", img) + + +if __name__ == "__main__": + test()