-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgaussian process - kansanshi.py
171 lines (123 loc) · 5.23 KB
/
gaussian process - kansanshi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
import matplotlib.pyplot as plt
import matplotlib.cm as cmap
# cm = cmap.inferno
import numpy as np
print(np.__version__)
import scipy as sp
import theano
# theano.config.optimizer = 'None'
import theano.tensor as tt
import theano.tensor.nlinalg
import sys
# sys.path.insert(0, "../../..")
import pymc3 as pm
# np.random.seed(20090425)
import pandas as pd
import theano.tensor as tt
import random
data = pd.read_csv("C:\\Users\\NIU004\\OneDrive - CSIRO\\Desktop\\Mineral sorting\\Kansanshi\\multiscale.csv")
X = np.array(data['x']).reshape(-1,1)
y = np.array(data['y'])
# fig = plt.figure(figsize=(14,5)); ax = fig.add_subplot(111)
# ax.plot(X, y, 'ok', ms=10);
# ax.set_xlabel("partial");
# ax.set_ylabel("entire");
# np.random.seed(20090425)
# n = 20
# X = np.sort(3*np.random.rand(n))[:,None]
# with pm.Model() as model:
# # f(x)
# l_true = 0.3
# s2_f_true = 1.0
# cov = s2_f_true * pm.gp.cov.ExpQuad(1, l_true)
# # noise, epsilon
# s2_n_true = 0.1
# K_noise = s2_n_true**2 * tt.eye(n)
# K = cov(X) + K_noise
# # evaluate the covariance with the given hyperparameters
# K = theano.function([], cov(X) + K_noise)()
# # generate fake data from GP with white noise (with variance sigma2)
# y = np.random.multivariate_normal(np.zeros(n), K)
# with pm.Model() as model:
# ℓ = pm.Uniform("ℓ", lower=0,upper=2.5)
# η = pm.Uniform("η", lower=0,upper=0.5)
# cov = η ** 2 * pm.gp.cov.Matern52(1, ℓ)
# gp = pm.gp.Marginal(cov_func=cov)
# σ = pm.HalfCauchy("σ", beta=5)
# y_ = gp.marginal_likelihood("y", X=df_train_x, y=df_train_y, noise=σ)
# mp = pm.find_MAP()
with pm.Model() as model:
# priors on the covariance function hyperparameters
l = pm.Uniform('l',0, 10)
# uninformative prior on the function variance
log_s2_f = pm.Uniform('log_s2_f', lower=-10, upper=10)
s2_f = pm.Deterministic('s2_f', tt.exp(log_s2_f))
# uninformative prior on the noise variance
log_s2_n = pm.Uniform('log_s2_n', lower=-10, upper=10)
s2_n = pm.Deterministic('s2_n', tt.exp(log_s2_n))
#cov = s2_f * pm.gp.cov.ExpQuad(1, l)
cov = s2_f*pm.gp.cov.Exponential(1, l)
#cov = s2_f * pm.gp.cov.Matern32(1,l)
gp = pm.gp.Marginal(cov_func=cov)
y_ = gp.marginal_likelihood("y", X=X, y=y, noise=s2_n)
mp = pm.find_MAP()
with model:
X_new = np.arange(-1,7,0.005)[:,None]
f_pred = gp.conditional("f_pred", X_new)
pred_samples = pm.sample_posterior_predictive([mp], var_names=['f_pred'], samples=2000)
mra = pd.read_csv("C:\\Users\\NIU004\\OneDrive - CSIRO\\Desktop\\Mineral sorting\\Kansanshi\\multiscale_mra1.csv")
x1 = np.array(mra['x'])
y1 = np.array(mra['y'])
# fig = plt.figure(figsize=(12, 5))
# plt.plot(X,y, "ok", ms=8, label="Bore core associated data")
# plt.scatter(x1,y1,color='blue', label="mra data")
# plt.xlabel("log10 tonnage")
# plt.ylabel("log10 variance")
# plt.title("Gaussian process")
# plt.legend()
# plot the results
fig = plt.figure(figsize=(12, 5))
ax = fig.gca()
# plot the samples from the gp posterior with samples and shading
from pymc3.gp.util import plot_gp_dist
plot_gp_dist(ax, pred_samples["f_pred"], X_new)
mean_pred = pred_samples["f_pred"].mean(axis=0)
# Plot the mean of the posterior predictive distribution
plt.plot(X_new, mean_pred, color='black', lw=4, label='Mean Posterior GP')
# plot the data and the true latent function
plt.plot(X,y, "ok", ms=8, label="Bore core associated data")
plt.scatter(x1,y1,color='blue', label="mra data")
# axis labels and title
plt.xlabel("log10 tonnage scale")
plt.ylabel("log10 variance")
plt.title("Gaussian process")
plt.legend()
variable = pred_samples['f_pred'][:,283]
# Calculate the mean and standard deviation
mean_pred_x_new = np.mean(variable)
std_pred_x_new = np.std(variable)
import scipy.stats as stats
# Calculate the cumulative probability up to x_0
x_0 = -0.521
cdf_value = stats.norm.cdf(x_0, loc=mean_pred_x_new, scale=std_pred_x_new)
# Calculate the probability of being higher than x_0
prob_higher_than_x0 = 1 - cdf_value
# Create a range of values for plotting the Gaussian distribution
x_values = np.linspace(min(variable), max(variable), 1000)
gaussian_pdf = stats.norm.pdf(x_values, loc=mean_pred_x_new, scale=std_pred_x_new)
# Plotting the marginal distribution
plt.figure(figsize=(16, 12))
plt.hist(variable, bins=30, density=True, alpha=0.6, color='b')
plt.axvline(mean_pred_x_new, color='r', linestyle='--', label=f"Mean: {mean_pred_x_new:.3f}")
plt.axvline(mean_pred_x_new + std_pred_x_new, color='g', linestyle='--', label=f"Mean + 1 Std")
plt.axvline(mean_pred_x_new - std_pred_x_new, color='g', linestyle='--', label=f"Mean - 1 Std")
plt.plot(x_values, gaussian_pdf, 'r-', label=f'Gaussian fit')
plt.axvline(-0.521, color='black', linestyle='--', label="Ground truth")
plt.tick_params(axis='both', which='major', labelsize=18)
plt.xlabel("Predicted Variance at MRA belt sensor scale",fontsize=24)
plt.ylabel("Density",fontsize=24)
# plt.title(f"Marginal Distribution at x=0.415",fontsize=24)
plt.legend(fontsize=20)
plt.show()
from sklearn.metrics import mean_squared_error
mean_squared_error([-0.386],[-0.521])