From 098ce8c164fc0a9646a89701a9075194f74bfe43 Mon Sep 17 00:00:00 2001
From: Ayush Joshi <ayush854032@gmail.com>
Date: Thu, 7 Dec 2023 13:48:07 +0530
Subject: [PATCH] Added examples comparing both `sklearn` and `ai` api's
 `LinearRegression` estimator over a single feature of `diabetes` dataset

Signed-off-by: Ayush Joshi <ayush854032@gmail.com>
---
 examples/linear_model/linear.py | 105 ++++++++++++++++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 examples/linear_model/linear.py

diff --git a/examples/linear_model/linear.py b/examples/linear_model/linear.py
new file mode 100644
index 0000000..dfe18a6
--- /dev/null
+++ b/examples/linear_model/linear.py
@@ -0,0 +1,105 @@
+# Copyright 2023 The AI Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=too-many-function-args, invalid-name, missing-module-docstring
+# pylint: disable=missing-class-docstring
+
+import numpy as np
+import matplotlib.pyplot as plt
+
+from sklearn import datasets, linear_model
+from sklearn.metrics import mean_squared_error, r2_score
+
+from ai import LinearRegression
+
+# Load the diabetes dataset
+diabetes_X, diabetes_y = datasets.load_diabetes(return_X_y=True)
+
+# Use only one feature
+diabetes_X = diabetes_X[:, np.newaxis, 2]
+
+# Split the data into training/testing sets
+diabetes_X_train = diabetes_X[:-20]
+diabetes_X_test = diabetes_X[-20:]
+
+# Split the targets into training/testing sets
+diabetes_y_train = diabetes_y[:-20]
+diabetes_y_test = diabetes_y[-20:]
+
+# Create linear regression object sklearn api
+model = linear_model.LinearRegression()
+
+# Train the model using the training set
+model.fit(diabetes_X_train, diabetes_y_train)
+
+# Make predictions using the testing set
+diabetes_y_pred = model.predict(diabetes_X_test)
+
+print("Calculating using sklearn api...")
+
+# The coefficients
+print("(sklearn) Coefficients: \n", model.coef_)
+# The mean squared error
+print(
+  "(sklearn) Mean squared error: %.2f" %
+  mean_squared_error(diabetes_y_test, diabetes_y_pred)
+)
+# The coefficient of determination: 1 is perfect prediction
+print(
+  "(sklearn) Coefficient of determination: %.2f" %
+  r2_score(diabetes_y_test, diabetes_y_pred)
+)
+
+# Plot outputs
+plt.scatter(diabetes_X_test, diabetes_y_test, color="black")
+plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)
+
+plt.xticks(())
+plt.yticks(())
+
+plt.show()
+
+print("Calculating using ai api...")
+
+# Create linear regression object using ai api
+# Since there's no implementation of the closed-form solution for linear
+# regression in `ai`; `sklearn` api beats us in terms of speed
+model = LinearRegression(n_iters=1_000_000)
+
+# Train the model using the training set
+model.fit(diabetes_X_train, diabetes_y_train)
+
+# Make predictions using the testing set
+diabetes_y_pred = model.predict(diabetes_X_test)
+
+# The coefficients
+print("(ai) Coefficients: \n", model._weights)
+# The mean squared error
+print(
+  "(ai) Mean squared error: %.2f" %
+  mean_squared_error(diabetes_y_test, diabetes_y_pred)
+)
+# The coefficient of determination: 1 is perfect prediction
+print(
+  "(ai) Coefficient of determination: %.2f" %
+  r2_score(diabetes_y_test, diabetes_y_pred)
+)
+
+# Plot outputs
+plt.scatter(diabetes_X_test, diabetes_y_test, color="black")
+plt.plot(diabetes_X_test, diabetes_y_pred, color="blue", linewidth=3)
+
+plt.xticks(())
+plt.yticks(())
+
+plt.show()