-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathPCA.m
116 lines (97 loc) · 3.34 KB
/
PCA.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
function [eigvector, eigvalue, elapse,sampleMean] = PCA(data, ReducedDim)
%PCA Principal Component Analysis
%
% Usage:
% [eigvector, eigvalue] = PCA(data, ReducedDim)
% [eigvector, eigvalue] = PCA(data)
%
% Input:
% data - Data matrix. Each row vector of fea is a data point.
%
% ReducedDim - The dimensionality of the reduced subspace. If 0,
% all the dimensions will be kept.
% Default is 0.
%
% Output:
% eigvector - Each column is an embedding function, for a new
% data point (row vector) x, y = x*eigvector
% will be the embedding result of x.
% eigvalue - The sorted eigvalue of PCA eigen-problem.
%
% Examples:
% fea = rand(7,10);
% [eigvector,eigvalue] = PCA(fea,4);
% Y = fea*eigvector;
%
%
% version 2.1 --June/2007
% version 2.0 --May/2007
% version 1.1 --Feb/2006
% version 1.0 --April/2004
%
% Written by Deng Cai (dengcai2 AT cs.uiuc.edu)
%
if (~exist('ReducedDim','var'))
ReducedDim = 0;
end
[nSmp,nFea] = size(data);
if (ReducedDim > nFea) | (ReducedDim <=0)
ReducedDim = nFea;
end
tmp_T = cputime;
if issparse(data)
data = full(data);
end
sampleMean = mean(data,1);
data = (data - repmat(sampleMean,nSmp,1));
if nFea/nSmp > 1.0713
% This is an efficient method which computes the eigvectors of
% of A*A^T (instead of A^T*A) first, and then convert them back to
% the eigenvectors of A^T*A.
ddata = data*data';
ddata = max(ddata, ddata');
dimMatrix = size(ddata,2);
if dimMatrix > 1000 & ReducedDim < dimMatrix/10 % using eigs to speed up!
option = struct('disp',0);
[eigvector, eigvalue] = eigs(ddata,ReducedDim,'la',option);
eigvalue = diag(eigvalue);
else
[eigvector, eigvalue] = eig(ddata);
eigvalue = diag(eigvalue);
[junk, index] = sort(-eigvalue);
eigvalue = eigvalue(index);
eigvector = eigvector(:, index);
end
clear ddata;
maxEigValue = max(abs(eigvalue));
eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
eigvalue (eigIdx) = [];
eigvector (:,eigIdx) = [];
eigvector = data'*eigvector; % Eigenvectors of A^T*A
eigvector = eigvector*diag(1./(sum(eigvector.^2).^0.5)); % Normalization
else
ddata = data'*data;
ddata = max(ddata, ddata');
dimMatrix = size(ddata,2);
if dimMatrix > 1000 & ReducedDim < dimMatrix/10 % using eigs to speed up!
option = struct('disp',0);
[eigvector, eigvalue] = eigs(ddata,eigvector_n,'la',option);
eigvalue = diag(eigvalue);
else
[eigvector, eigvalue] = eig(ddata);
eigvalue = diag(eigvalue);
[junk, index] = sort(-eigvalue);
eigvalue = eigvalue(index);
eigvector = eigvector(:, index);
end
clear ddata;
maxEigValue = max(abs(eigvalue));
eigIdx = find(abs(eigvalue)/maxEigValue < 1e-12);
eigvalue (eigIdx) = [];
eigvector (:,eigIdx) = [];
end
if ReducedDim < length(eigvalue)
eigvalue = eigvalue(1:ReducedDim);
eigvector = eigvector(:, 1:ReducedDim);
end
elapse = cputime - tmp_T;