algo-rhythmic

trevorhuang1 · Apr 3, 2024 · cf5c8ef · cf5c8ef
1 parent 534f0fa
commit cf5c8ef
Show file tree

Hide file tree

Showing 8 changed files with 1,732 additions and 10 deletions.
diff --git a/_notebooks/2024-03-05-DS-python-pandas-df_titanic.ipynb b/_notebooks/2024-03-05-DS-python-pandas-df_titanic.ipynb
@@ -69,14 +69,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: seaborn in /home/trevor/.local/lib/python3.10/site-packages (0.13.2)\n",
+      "Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /usr/lib/python3/dist-packages (from seaborn) (3.5.1)\n",
+      "Requirement already satisfied: numpy!=1.24.0,>=1.20 in /home/trevor/.local/lib/python3.10/site-packages (from seaborn) (1.26.4)\n",
+      "Requirement already satisfied: pandas>=1.2 in /home/trevor/.local/lib/python3.10/site-packages (from seaborn) (2.2.1)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas>=1.2->seaborn) (2022.1)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /home/trevor/.local/lib/python3.10/site-packages (from pandas>=1.2->seaborn) (2024.1)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/trevor/.local/lib/python3.10/site-packages (from pandas>=1.2->seaborn) (2.9.0.post0)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas>=1.2->seaborn) (1.16.0)\n",
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: pandas in /home/trevor/.local/lib/python3.10/site-packages (2.2.1)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (2024.1)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas) (2022.1)\n",
+      "Requirement already satisfied: numpy<2,>=1.22.4 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (1.26.4)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (2.9.0.post0)\n",
+      "Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
+      "Defaulting to user installation because normal site-packages is not writeable\n",
+      "Requirement already satisfied: scikit-learn in /home/trevor/.local/lib/python3.10/site-packages (1.4.1.post1)\n",
+      "Requirement already satisfied: scipy>=1.6.0 in /usr/lib/python3/dist-packages (from scikit-learn) (1.8.0)\n",
+      "Requirement already satisfied: threadpoolctl>=2.0.0 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (3.3.0)\n",
+      "Requirement already satisfied: numpy<2.0,>=1.19.5 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (1.26.4)\n",
+      "Requirement already satisfied: joblib>=1.2.0 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (1.3.2)\n"
+     ]
+    }
+   ],
    "source": [
     "# Uncomment the following lines to install the required packages\n",
-    "# !pip install seaborn\n",
-    "# !pip install pandas\n",
-    "# !pip install scikit-learn"
+    "!pip install seaborn\n",
+    "!pip install pandas\n",
+    "!pip install scikit-learn"
    ]
   },
   {
@@ -90,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -358,7 +387,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {

diff --git a/_notebooks/2024-03-12-CSP-MCQ-2020-missed.ipynb b/_notebooks/2024-03-12-CSP-MCQ-2020-missed.ipynb
@@ -985,6 +985,86 @@
     "animal = animal + \"antelope\"[4:8]\n",
     "print(animal)\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Element 12 is found at index 5\n"
+     ]
+    }
+   ],
+   "source": [
+    "def binary_search(arr, target):\n",
+    "    low = 0\n",
+    "    high = len(arr) - 1\n",
+    "\n",
+    "    while low <= high:\n",
+    "        mid = (low + high) // 2\n",
+    "        if arr[mid] == target:\n",
+    "            return mid\n",
+    "        elif arr[mid] < target:\n",
+    "            low = mid + 1\n",
+    "        else:\n",
+    "            high = mid - 1\n",
+    "\n",
+    "    return -1  # Target not found\n",
+    "\n",
+    "# Example usage\n",
+    "arr = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]\n",
+    "target = 12\n",
+    "result = binary_search(arr, target)\n",
+    "if result != -1:\n",
+    "    print(f\"Element {target} is found at index {result}\")\n",
+    "else:\n",
+    "    print(f\"Element {target} is not found in the array\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "[nltk_data] Downloading package vader_lexicon to\n",
+      "[nltk_data]     /home/trevor/nltk_data...\n",
+      "[nltk_data]   Package vader_lexicon is already up-to-date!\n"
+     ]
+    }
+   ],
+   "source": [
+    "import nltk\n",
+    "from nltk.sentiment import SentimentIntensityAnalyzer\n",
+    "\n",
+    "# Download the vader_lexicon\n",
+    "nltk.download('vader_lexicon')\n",
+    "\n",
+    "# Initialize the VADER sentiment intensity analyzer\n",
+    "sia = SentimentIntensityAnalyzer()\n",
+    "\n",
+    "# Provide a string to analyze\n",
+    "text = \"\"\n",
+    "\n",
+    "# Get the sentiment scores\n",
+    "sentiment = sia.polarity_scores(text)\n",
+    "\n",
+    "print(sentiment)"
+   ]
   }
  ],
  "metadata": {

diff --git a/_notebooks/2024-03-25-inflation.ipynb b/_notebooks/2024-03-25-inflation.ipynb
@@ -0,0 +1,153 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.tree import DecisionTreeClassifier, export_text\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "from sklearn.preprocessing import OneHotEncoder\n",
+    "import pandas as pd\n",
+    "import seaborn as sns\n",
+    "import numpy as np\n",
+    "import pdb\n",
+    "import ast\n",
+    "\n",
+    "# Define the TitanicRegression global variable\n",
+    "inflation_regression = None\n",
+    "\n",
+    "# Define the TitanicRegression class\n",
+    "class InflationRegression:\n",
+    "    def __init__(self):\n",
+    "        self.dt = None\n",
+    "        self.logreg = None\n",
+    "        self.X_train = None\n",
+    "        self.X_test = None\n",
+    "        self.y_train = None\n",
+    "        self.y_test = None\n",
+    "        self.encoder = None\n",
+    "        self.initInflation()  # Call initTitanic in the constructor\n",
+    "\n",
+    "\n",
+    "    def initInflation(self):\n",
+    "        cpi_data = pd.read_csv('files/us_cpi.csv')\n",
+    "        self.td = cpi_data\n",
+    "        self.td.dropna(inplace=True) # drop rows with at least one missing value, after dropping unuseful columns\n",
+    "\n",
+    "        # Encode categorical variables\n",
+    "        self.encoder = OneHotEncoder(handle_unknown='ignore')\n",
+    "        self.encoder.fit(self.td[['embarked']])\n",
+    "        self.onehot = self.encoder.transform(self.td[['embarked']]).toarray()\n",
+    "        cols = ['embarked_' + val for val in self.encoder.categories_[0]]\n",
+    "        self.td[cols] = pd.DataFrame(self.onehot)\n",
+    "        self.td.dropna(inplace=True)\n",
+    "        print(self.td)\n",
+    "        # clean data\n",
+    "        \n",
+    "\n",
+    "\n",
+    "\n",
+    "    def runDecisionTree(self):\n",
+    "        X = self.td.drop('survived', axis=1) # all except 'survived'\n",
+    "        y = self.td['survived'] # only 'survived'\n",
+    "        self.X_train, X_test, self.y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
+    "        dt = DecisionTreeClassifier()\n",
+    "        dt.fit(self.X_train, self.y_train)\n",
+    "        self.dt = dt\n",
+    "        print(export_text(dt, feature_names=X.columns.tolist()))\n",
+    "\n",
+    "        # more code here\n",
+    "\n",
+    "    def runLogisticRegression(self, X, y):\n",
+    "        # more code here\n",
+    "        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
+    "        self.logreg = LogisticRegression()\n",
+    "        self.logreg.fit(self.X_train, self.y_train)\n",
+    "        \n",
+    "    def predictSurvival(self, passenger):\n",
+    "        X = self.td.drop('survived', axis=1) # all except 'survived'\n",
+    "        y = self.td['survived'] # only 'survived'\n",
+    "        self.X_train, X_test, self.y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
+    "                \n",
+    "        self.logreg = LogisticRegression()\n",
+    "        self.logreg.fit(self.X_train, self.y_train)\n",
+    "        \n",
+    "        passenger = list(passenger.values())\n",
+    "        \n",
+    "        passenger = pd.DataFrame({\n",
+    "            'name': [passenger[0]],\n",
+    "            'pclass': [passenger[1]],\n",
+    "            'sex': [passenger[2]],\n",
+    "            'age': [passenger[3]],\n",
+    "            'sibsp': [passenger[4]],\n",
+    "            'parch': [passenger[5]],\n",
+    "            'fare': [passenger[6]],\n",
+    "            'embarked': [passenger[7]],\n",
+    "            'alone': [passenger[8]]\n",
+    "        })\n",
+    "        \n",
+    "        passenger['sex'] = passenger['sex'].apply(lambda x: 1 if x == 'male' else 0)\n",
+    "        passenger['alone'] = passenger['alone'].apply(lambda x: 1 if x == True else 0)\n",
+    "        onehot = self.encoder.transform(passenger[['embarked']])\n",
+    "        cols = ['embarked_' + val for val in self.encoder.categories_[0]]\n",
+    "        print(passenger)\n",
+    "        passenger[cols] = pd.DataFrame(onehot.toarray(), index=passenger.index)\n",
+    "        passenger.drop(['name'], axis=1, inplace=True)\n",
+    "        passenger.drop(['embarked'], axis=1, inplace=True)\n",
+    "        \n",
+    "        print(passenger)\n",
+    "        # passenger_list = list(passenger[\"passenger\"].values())\n",
+    "\n",
+    "        # passenger = np.asarray(passenger_list).reshape(1, -1)\n",
+    "        # #preprocessing\n",
+    "    \n",
+    "        aliveProb = np.squeeze(self.logreg.predict_proba(passenger))\n",
+    "        print(aliveProb)\n",
+    "        aliveProb.tolist()\n",
+    "        deathProb = aliveProb[0]\n",
+    "        aliveProb = aliveProb[1]\n",
+    "        \n",
+    "        return 'Survival probability: {:.2%}'.format(aliveProb),('Death probability: {:.2%}'.format(deathProb))  \n",
+    "\n",
+    "\n",
+    "def initInflation():\n",
+    "    global titanic_regression\n",
+    "    titanic_regression = TitanicRegression()\n",
+    "    titanic_regression.initTitanic()\n",
+    "    X = titanic_regression.td.drop('survived', axis=1)\n",
+    "    y = titanic_regression.td['survived']\n",
+    "    titanic_regression.runLogisticRegression(X, y)  #s  # Pass X and y to runLogisticRegression\n",
+    "\n",
+    "\n",
+    "# From API\n",
+    "\n",
+    "# Sample usage without API"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}