Skip to content

Commit

Permalink
algo-rhythmic
Browse files Browse the repository at this point in the history
  • Loading branch information
trevorhuang1 committed Apr 3, 2024
1 parent 534f0fa commit cf5c8ef
Show file tree
Hide file tree
Showing 8 changed files with 1,732 additions and 10 deletions.
43 changes: 36 additions & 7 deletions _notebooks/2024-03-05-DS-python-pandas-df_titanic.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,43 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: seaborn in /home/trevor/.local/lib/python3.10/site-packages (0.13.2)\n",
"Requirement already satisfied: matplotlib!=3.6.1,>=3.4 in /usr/lib/python3/dist-packages (from seaborn) (3.5.1)\n",
"Requirement already satisfied: numpy!=1.24.0,>=1.20 in /home/trevor/.local/lib/python3.10/site-packages (from seaborn) (1.26.4)\n",
"Requirement already satisfied: pandas>=1.2 in /home/trevor/.local/lib/python3.10/site-packages (from seaborn) (2.2.1)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas>=1.2->seaborn) (2022.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /home/trevor/.local/lib/python3.10/site-packages (from pandas>=1.2->seaborn) (2024.1)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/trevor/.local/lib/python3.10/site-packages (from pandas>=1.2->seaborn) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas>=1.2->seaborn) (1.16.0)\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: pandas in /home/trevor/.local/lib/python3.10/site-packages (2.2.1)\n",
"Requirement already satisfied: tzdata>=2022.7 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (2024.1)\n",
"Requirement already satisfied: pytz>=2020.1 in /usr/lib/python3/dist-packages (from pandas) (2022.1)\n",
"Requirement already satisfied: numpy<2,>=1.22.4 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (1.26.4)\n",
"Requirement already satisfied: python-dateutil>=2.8.2 in /home/trevor/.local/lib/python3.10/site-packages (from pandas) (2.9.0.post0)\n",
"Requirement already satisfied: six>=1.5 in /usr/lib/python3/dist-packages (from python-dateutil>=2.8.2->pandas) (1.16.0)\n",
"Defaulting to user installation because normal site-packages is not writeable\n",
"Requirement already satisfied: scikit-learn in /home/trevor/.local/lib/python3.10/site-packages (1.4.1.post1)\n",
"Requirement already satisfied: scipy>=1.6.0 in /usr/lib/python3/dist-packages (from scikit-learn) (1.8.0)\n",
"Requirement already satisfied: threadpoolctl>=2.0.0 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (3.3.0)\n",
"Requirement already satisfied: numpy<2.0,>=1.19.5 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (1.26.4)\n",
"Requirement already satisfied: joblib>=1.2.0 in /home/trevor/.local/lib/python3.10/site-packages (from scikit-learn) (1.3.2)\n"
]
}
],
"source": [
"# Uncomment the following lines to install the required packages\n",
"# !pip install seaborn\n",
"# !pip install pandas\n",
"# !pip install scikit-learn"
"!pip install seaborn\n",
"!pip install pandas\n",
"!pip install scikit-learn"
]
},
{
Expand All @@ -90,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -358,7 +387,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [
{
Expand Down
80 changes: 80 additions & 0 deletions _notebooks/2024-03-12-CSP-MCQ-2020-missed.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -985,6 +985,86 @@
"animal = animal + \"antelope\"[4:8]\n",
"print(animal)\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Element 12 is found at index 5\n"
]
}
],
"source": [
"def binary_search(arr, target):\n",
" low = 0\n",
" high = len(arr) - 1\n",
"\n",
" while low <= high:\n",
" mid = (low + high) // 2\n",
" if arr[mid] == target:\n",
" return mid\n",
" elif arr[mid] < target:\n",
" low = mid + 1\n",
" else:\n",
" high = mid - 1\n",
"\n",
" return -1 # Target not found\n",
"\n",
"# Example usage\n",
"arr = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]\n",
"target = 12\n",
"result = binary_search(arr, target)\n",
"if result != -1:\n",
" print(f\"Element {target} is found at index {result}\")\n",
"else:\n",
" print(f\"Element {target} is not found in the array\")\n"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'neg': 0.0, 'neu': 1.0, 'pos': 0.0, 'compound': 0.0}\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"[nltk_data] Downloading package vader_lexicon to\n",
"[nltk_data] /home/trevor/nltk_data...\n",
"[nltk_data] Package vader_lexicon is already up-to-date!\n"
]
}
],
"source": [
"import nltk\n",
"from nltk.sentiment import SentimentIntensityAnalyzer\n",
"\n",
"# Download the vader_lexicon\n",
"nltk.download('vader_lexicon')\n",
"\n",
"# Initialize the VADER sentiment intensity analyzer\n",
"sia = SentimentIntensityAnalyzer()\n",
"\n",
"# Provide a string to analyze\n",
"text = \"\"\n",
"\n",
"# Get the sentiment scores\n",
"sentiment = sia.polarity_scores(text)\n",
"\n",
"print(sentiment)"
]
}
],
"metadata": {
Expand Down
153 changes: 153 additions & 0 deletions _notebooks/2024-03-25-inflation.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.tree import DecisionTreeClassifier, export_text\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.metrics import accuracy_score\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import numpy as np\n",
"import pdb\n",
"import ast\n",
"\n",
"# Define the TitanicRegression global variable\n",
"inflation_regression = None\n",
"\n",
"# Define the TitanicRegression class\n",
"class InflationRegression:\n",
" def __init__(self):\n",
" self.dt = None\n",
" self.logreg = None\n",
" self.X_train = None\n",
" self.X_test = None\n",
" self.y_train = None\n",
" self.y_test = None\n",
" self.encoder = None\n",
" self.initInflation() # Call initTitanic in the constructor\n",
"\n",
"\n",
" def initInflation(self):\n",
" cpi_data = pd.read_csv('files/us_cpi.csv')\n",
" self.td = cpi_data\n",
" self.td.dropna(inplace=True) # drop rows with at least one missing value, after dropping unuseful columns\n",
"\n",
" # Encode categorical variables\n",
" self.encoder = OneHotEncoder(handle_unknown='ignore')\n",
" self.encoder.fit(self.td[['embarked']])\n",
" self.onehot = self.encoder.transform(self.td[['embarked']]).toarray()\n",
" cols = ['embarked_' + val for val in self.encoder.categories_[0]]\n",
" self.td[cols] = pd.DataFrame(self.onehot)\n",
" self.td.dropna(inplace=True)\n",
" print(self.td)\n",
" # clean data\n",
" \n",
"\n",
"\n",
"\n",
" def runDecisionTree(self):\n",
" X = self.td.drop('survived', axis=1) # all except 'survived'\n",
" y = self.td['survived'] # only 'survived'\n",
" self.X_train, X_test, self.y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
" dt = DecisionTreeClassifier()\n",
" dt.fit(self.X_train, self.y_train)\n",
" self.dt = dt\n",
" print(export_text(dt, feature_names=X.columns.tolist()))\n",
"\n",
" # more code here\n",
"\n",
" def runLogisticRegression(self, X, y):\n",
" # more code here\n",
" self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
" self.logreg = LogisticRegression()\n",
" self.logreg.fit(self.X_train, self.y_train)\n",
" \n",
" def predictSurvival(self, passenger):\n",
" X = self.td.drop('survived', axis=1) # all except 'survived'\n",
" y = self.td['survived'] # only 'survived'\n",
" self.X_train, X_test, self.y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)\n",
" \n",
" self.logreg = LogisticRegression()\n",
" self.logreg.fit(self.X_train, self.y_train)\n",
" \n",
" passenger = list(passenger.values())\n",
" \n",
" passenger = pd.DataFrame({\n",
" 'name': [passenger[0]],\n",
" 'pclass': [passenger[1]],\n",
" 'sex': [passenger[2]],\n",
" 'age': [passenger[3]],\n",
" 'sibsp': [passenger[4]],\n",
" 'parch': [passenger[5]],\n",
" 'fare': [passenger[6]],\n",
" 'embarked': [passenger[7]],\n",
" 'alone': [passenger[8]]\n",
" })\n",
" \n",
" passenger['sex'] = passenger['sex'].apply(lambda x: 1 if x == 'male' else 0)\n",
" passenger['alone'] = passenger['alone'].apply(lambda x: 1 if x == True else 0)\n",
" onehot = self.encoder.transform(passenger[['embarked']])\n",
" cols = ['embarked_' + val for val in self.encoder.categories_[0]]\n",
" print(passenger)\n",
" passenger[cols] = pd.DataFrame(onehot.toarray(), index=passenger.index)\n",
" passenger.drop(['name'], axis=1, inplace=True)\n",
" passenger.drop(['embarked'], axis=1, inplace=True)\n",
" \n",
" print(passenger)\n",
" # passenger_list = list(passenger[\"passenger\"].values())\n",
"\n",
" # passenger = np.asarray(passenger_list).reshape(1, -1)\n",
" # #preprocessing\n",
" \n",
" aliveProb = np.squeeze(self.logreg.predict_proba(passenger))\n",
" print(aliveProb)\n",
" aliveProb.tolist()\n",
" deathProb = aliveProb[0]\n",
" aliveProb = aliveProb[1]\n",
" \n",
" return 'Survival probability: {:.2%}'.format(aliveProb),('Death probability: {:.2%}'.format(deathProb)) \n",
"\n",
"\n",
"def initInflation():\n",
" global titanic_regression\n",
" titanic_regression = TitanicRegression()\n",
" titanic_regression.initTitanic()\n",
" X = titanic_regression.td.drop('survived', axis=1)\n",
" y = titanic_regression.td['survived']\n",
" titanic_regression.runLogisticRegression(X, y) #s # Pass X and y to runLogisticRegression\n",
"\n",
"\n",
"# From API\n",
"\n",
"# Sample usage without API"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit cf5c8ef

Please sign in to comment.