Skip to content

Commit

Permalink
Add FigShare datasets, DOS in QE, Uncertainty.
Browse files Browse the repository at this point in the history
  • Loading branch information
knc6 committed Aug 11, 2023
1 parent 30eae92 commit 4477c4a
Show file tree
Hide file tree
Showing 6 changed files with 338 additions and 18 deletions.
2 changes: 1 addition & 1 deletion jarvis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
"""Version number."""
__version__ = "2023.05.26"
__version__ = "2023.08.01"

import os

Expand Down
193 changes: 193 additions & 0 deletions jarvis/ai/uncertainty/gaussian_process_uncertainty.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
"""
Code to predict properties and their uncertainty.
ML model used: lgbm
"""
# Ref: https://doi.org/10.1021/acsomega.1c03752
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from jarvis.ai.pkgs.utils import regr_scores
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF
from sklearn.gaussian_process.kernels import WhiteKernel
from sklearn.gaussian_process.kernels import RationalQuadratic

# from sklearn.gaussian_process.kernels import ExpSineSquared
# from sklearn.gaussian_process.kernels import ConstantKernel as C
# import joblib
from joblib import dump
from collections import OrderedDict


def GaussianProcesses(
x,
y,
jid,
cv=2,
n_jobs=-1,
n_iter=10,
random_state=508842607,
scoring="neg_mean_absolute_error",
prop="exfoliation_energy",
write_files=True,
):
"""
Perform Gaussian Processes and determine prediction intervals.
NOTE: it can deal with ~50 descriptors AT MAX ==> reduction of
the CFID descriptors is needed
"""
# TODO: Make writing file in proper python format

# STEP-2: Splitting the data
# ***************************
# 90-10% split for train test

X_train, X_test, y_train, y_test, jid_train, jid_test = train_test_split(
x, y, jid, random_state=1, test_size=0.1
)
# print ('lenx len y',len(x[0]),len(y))

# STEP-3: Use a specific ML modeli: GP in this case
# *************************************************
#
# Model definition
# ================
# The Kernel can be chanded depending on the property under examination.
# In this example we use a composite kernel that includes long range
# effects (RBF), medium term irregularities (RationalQuadratic), and
# white noise
# Periodic trends can be added using ExpSineSquared
# Note: RBF has one parameter for each descriptor used ==> if using a
# different number of descriptors, the number of parameters has
# to be changed
RQ1 = 316**2 * RationalQuadratic(
alpha=0.067,
length_scale=43.5,
length_scale_bounds=(1e0, 1e2),
alpha_bounds=(1e-3, 1e1),
)
RQ2 = 316**2 * RationalQuadratic(
alpha=0.00473,
length_scale=0.0796,
length_scale_bounds=(1e-2, 1e0),
alpha_bounds=(1e-3, 1e1),
)
RBF1 = 316**2 * RBF(
length_scale=[
16,
1.48e03,
9.92e04,
0.858,
1.02,
2.58,
2.72e03,
2.22e03,
0.839,
0.5,
6.15,
130,
3.9e03,
3.1,
0.675,
6.04,
1.18e03,
2.9,
4.56e03,
333,
0.0167,
797,
2.37e03,
1.81e03,
]
)
White = WhiteKernel(noise_level=0.7, noise_level_bounds=(0.05, 0.8))
kernel = RQ1 + RQ2 + RBF1 + White

mid_model = GaussianProcessRegressor(
kernel=kernel,
alpha=0.0001,
n_restarts_optimizer=3,
normalize_y=True,
random_state=int(774057201),
)

# Model fitting
# =============
scaler = StandardScaler().fit(X_train)
scaler.transform(X_train)
scaler.transform(X_test)

mid_model.fit(X_train, y_train)

dump(mid_model, "model.joblib")
print("")
print("Params:")
print(mid_model.get_params(deep=True))
print("Initial: ", kernel)
print("Optimum: ", mid_model.kernel_)
print(
"Log-Marginal-Likelihood: ",
mid_model.log_marginal_likelihood(mid_model.kernel_.theta),
)
print("")
print("")

# PREDICTIONS and UQ
# ==================
pred, sigma = mid_model.predict(X_test, return_std=True)
print("Model mae rmse")
reg_sc = regr_scores(y_test, pred)
info = OrderedDict()
info["MAE_Mid"] = reg_sc["mae"]
actual = y_test
print("Mid:", round(reg_sc["mae"], 3), round(reg_sc["rmse"], 3))

fout2 = open("Intervals1.dat", "w")
line0 = "# Jid Observed pred_Lower pred_Mid "
line1 = line0 + "RealErr(Mid) "
line = line1 + "pred_Upper PredError Pred_inBounds\n"
fout2.write(line)
sum = 0.0
count = 0
MAE_err = 0.0
for ii in range(len(y_test)):
ss = float(sigma[ii])
pp = float(pred[ii])
sum = sum + ss
lower = pp - ss
upper = pp + ss
err = ss
real_err = float(abs(y_test[ii] - pp))
err_err = abs(real_err - err)
MAE_err = MAE_err + err_err
if (pp >= lower) and (pp <= upper):
in_bounds = "True"
count = count + 1
else:
in_bounds = "False"
line2_0 = str(ii) + " " + jid[ii] + " " + str(y_test[ii]) + " "
line2_1 = line2_0 + str(lower) + " " + str(pp) + " " + str(real_err)
line2 = (
line2_1
+ " "
+ str(upper)
+ " "
+ str(ss)
+ " "
+ str(in_bounds)
+ "\n"
)
fout2.write(line2)
print("")
print("Number of test materials= " + str(len(actual)))
print(
"Percentage of in-bound results= "
+ str((float(count) / (len(actual))) * 100)
+ "%"
)
print(" ")
MAE_error = float(MAE_err) / (len(actual))
print("MAE predicted error = " + str(MAE_error))
info["MAE_Error"] = MAE_error
return info
2 changes: 1 addition & 1 deletion jarvis/ai/uncertainty/lgbm_quantile_uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
ML model used: lgbm
"""

# Ref: https://doi.org/10.1021/acsomega.1c03752
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle
Expand Down
37 changes: 36 additions & 1 deletion jarvis/db/figshare.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,6 +357,41 @@ def get_db_info():
"Obtaining TinNet OH dataset 748...",
"https://github.com/hlxin/tinnet",
],
# https://doi.org/10.6084/m9.figshare.23909478
"AGRA_O": [
"https://figshare.com/ndownloader/files/41923284",
"AGRA_O.json",
"Obtaining AGRA Oxygen dataset 1000...",
"https://github.com/Feugmo-Group/AGRA",
],
# https://doi.org/10.6084/m9.figshare.23909478
"AGRA_OH": [
"https://figshare.com/ndownloader/files/41923287",
"AGRA_OH.json",
"Obtaining AGRA OH dataset 875...",
"https://github.com/Feugmo-Group/AGRA",
],
# https://doi.org/10.6084/m9.figshare.23909478
"AGRA_CO": [
"https://figshare.com/ndownloader/files/41923278",
"AGRA_CO.json",
"Obtaining AGRA CO dataset 193...",
"https://github.com/Feugmo-Group/AGRA",
],
# https://doi.org/10.6084/m9.figshare.23909478
"AGRA_CHO": [
"https://figshare.com/ndownloader/files/41923275",
"AGRA_CHO.json",
"Obtaining AGRA Oxygen dataset 214...",
"https://github.com/Feugmo-Group/AGRA",
],
# https://doi.org/10.6084/m9.figshare.23909478
"AGRA_COOH": [
"https://figshare.com/ndownloader/files/41923281",
"AGRA_COOH.json",
"Obtaining AGRA COOH dataset 280...",
"https://github.com/Feugmo-Group/AGRA",
],
# https://doi.org/10.6084/m9.figshare.21370572
"supercon_3d": [
"https://figshare.com/ndownloader/files/38307921",
Expand Down Expand Up @@ -610,7 +645,7 @@ def get_hk_tb(k=np.array([0, 0, 0]), w=[]):
hk = np.zeros((w.nwan, w.nwan), dtype=complex)
kmat = np.tile(k, (nr, 1))
exp_ikr = np.exp(1.0j * 2 * np.pi * np.sum(kmat * w.R, 1))
temp = np.zeros(w.nwan ** 2, dtype=complex)
temp = np.zeros(w.nwan**2, dtype=complex)
for i in range(nr):
temp += exp_ikr[i] * w.HR[i, :]
hk = np.reshape(temp, (w.nwan, w.nwan))
Expand Down
Loading

0 comments on commit 4477c4a

Please sign in to comment.