-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpca.py
40 lines (30 loc) · 1.26 KB
/
pca.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
import numpy as np
# Sample data (replace with your dataset)
data = np.array([
[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[10, 11, 12]
])
# Step 1: Mean centering
mean = np.mean(data, axis=0)
centered_data = data - mean
# Step 2: Compute the covariance matrix
cov_matrix = np.cov(centered_data, rowvar=False)
# Step 3: Eigendecomposition of the covariance matrix
eigenvalues, eigenvectors = np.linalg.eigh(cov_matrix)
# Sort eigenvectors by decreasing eigenvalues
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]
# Step 4: Select a subset of eigenvectors (principal components)
num_components = 2 # Choose the number of components to keep
selected_eigenvectors = eigenvectors[:, :num_components]
# Step 5: Project the data onto the selected principal components
reduced_data = np.dot(centered_data, selected_eigenvectors)
# Now, 'reduced_data' contains the data in reduced dimensions
# You can also calculate the variance explained by the selected components
total_variance = np.sum(eigenvalues)
explained_variance = np.sum(eigenvalues[:num_components])
explained_variance_ratio = explained_variance / total_variance
print(f"Explained Variance Ratio: {explained_variance_ratio:.4f}")