Skip to content

Commit

Permalink
added comments and fixed bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
Maha Alnassr committed Nov 13, 2023
1 parent e2d3706 commit 7117dc9
Showing 1 changed file with 32 additions and 50 deletions.
82 changes: 32 additions & 50 deletions career-model/JupyterNotebook.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -452,35 +452,32 @@
"source": [
"# Count of unique categories\n",
"gender_counts = df['Gender'].value_counts()\n",
"print(\"Gender Counts:\")\n",
"print(gender_counts)\n",
"major_counts = df['Major'].value_counts()\n",
"print(\"\\nMajor Counts:\")\n",
"print(major_counts)\n",
"extra_curricular_counts = df['Extra Curricular'].value_counts()\n",
"print(\"\\nExtra Curricular Counts:\")\n",
"print(extra_curricular_counts)\n",
"\n",
"# Unique categories\n",
"unique_genders = df['Gender'].unique()\n",
"print(\"Unique Genders:\")\n",
"print(unique_genders)\n",
"unique_majors = df['Major'].unique()\n",
"print(\"Unique Majors:\")\n",
"print(unique_majors)\n",
"unique_extra_curricular = df['Extra Curricular'].unique()\n",
"print(\"Unique Extra Curricular Activities:\")\n",
"print(unique_extra_curricular)\n",
"\n",
"# Mode\n",
"mode_gender = df['Gender'].mode().values[0]\n",
"mode_major = df['Major'].mode().values[0]\n",
"mode_extra_curricular = df['Extra Curricular'].mode().values[0]\n",
"\n",
"print(\"Gender Counts:\")\n",
"print(gender_counts)\n",
"print(\"Unique Genders:\")\n",
"print(unique_genders)\n",
"print(\"Mode Gender:\", mode_gender)\n",
"\n",
"print(\"\\nMajor Counts:\")\n",
"print(major_counts)\n",
"print(\"Unique Majors:\")\n",
"print(unique_majors)\n",
"mode_major = df['Major'].mode().values[0]\n",
"print(\"Mode Major:\", mode_major)\n",
"\n",
"print(\"\\nExtra Curricular Counts:\")\n",
"print(extra_curricular_counts)\n",
"print(\"Unique Extra Curricular Activities:\")\n",
"print(unique_extra_curricular)\n",
"mode_extra_curricular = df['Extra Curricular'].mode().values[0]\n",
"print(\"Mode Extra Curricular:\", mode_extra_curricular)\n"
]
},
Expand Down Expand Up @@ -577,7 +574,6 @@
}
],
"source": [
"\n",
"sns.countplot(data = df, y = 'Major', order = df['Major'].value_counts().index, hue = 'Major')\n",
"plt.title(\"Distribution of Major\")"
]
Expand Down Expand Up @@ -612,7 +608,6 @@
],
"source": [
"# Age\n",
"#df.groupby('Age').size().plot(kind='bar', title='Distribution of Age', ylabel='No. of Students')\n",
"sns.histplot(data=df, x=\"Age\", kde=True, bins = 10)"
]
},
Expand Down Expand Up @@ -646,7 +641,6 @@
],
"source": [
"# GPA\n",
"\n",
"sns.histplot(data=df, x=\"GPA\", kde=True).set(title='Distribution of GPA')"
]
},
Expand Down Expand Up @@ -682,7 +676,6 @@
"# Extra Curricular\n",
"color = ['black','red','green','orange','blue','limegreen','darkgreen','royalblue','navy','red','pink','orange']\n",
"\n",
"\n",
"sns.countplot(data = df, y = 'Extra Curricular', order = df['Extra Curricular'].value_counts().index, hue='Extra Curricular')\n",
"plt.title(\"Distribution of Extra-Curriculars\")"
]
Expand Down Expand Up @@ -1097,7 +1090,7 @@
"source": [
"from sklearn import metrics\n",
"confusion_matrix = metrics.confusion_matrix(y, y_pred)\n",
"print(\"The confusion matrix is\")\n",
"print(\"Confusion Matrix: \")\n",
"confusion_matrix"
]
},
Expand Down Expand Up @@ -1172,30 +1165,24 @@
}
],
"source": [
"from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, confusion_matrix\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report\n",
"\n",
"# Assuming you have already trained your model (replace 'model' with your model)\n",
"# Make predictions on the test dataset\n",
"y_pred = clf.predict(X)\n",
"\n",
"# Calculate accuracy\n",
"accuracy = accuracy_score(y_true, y_pred)\n",
"print(f\"Accuracy: {accuracy:.2f}\")\n",
"\n",
"# Calculate precision and recall, and F1-score\n",
"# Calculate precision and recall\n",
"precision = precision_score(y_true, y_pred)\n",
"recall = recall_score(y_true, y_pred)\n",
"print(f\"Precision: {precision:.2f}\")\n",
"print(f\"Recall: {recall:.2f}\")\n",
"\n",
"# Generate a classification report (includes precision, recall, and F1-score)\n",
"print(\"Classification Report:\")\n",
"print(classification_report(y_true, y_pred))\n",
"\n",
"# Create and display a confusion matrix\n",
"confusion = confusion_matrix(y_true, y_pred)\n",
"print(\"Confusion Matrix:\")\n",
"print(confusion)\n"
"print(\"Classification Report: \")\n",
"print(classification_report(y_true, y_pred))\n"
]
},
{
Expand Down Expand Up @@ -1408,6 +1395,8 @@
}
],
"source": [
"import json\n",
"\n",
"columns = ['Major', 'Age', 'Gender', 'Extra Curricular', 'Num Programming Languages', 'Num Past Internships']\n",
"distribution_counts = dict()\n",
"for index, row in df.iterrows():\n",
Expand All @@ -1419,7 +1408,6 @@
" distribution_counts[col][row[col]] = 0\n",
" distribution_counts[col][row[col]] += 1\n",
"\n",
"import json\n",
"print(json.dumps(distribution_counts, sort_keys=True, indent=4))"
]
},
Expand All @@ -1436,6 +1424,8 @@
"metadata": {},
"outputs": [],
"source": [
"# creds to Spring '23 team\n",
"\n",
"import pandas as pd\n",
"import joblib\n",
"from pydantic import BaseModel, Field\n",
Expand Down Expand Up @@ -1608,7 +1598,6 @@
"metadata": {},
"outputs": [],
"source": [
"# Keep track of outputs, predicted vs actual for accuracy metrics to be done later\n",
"predicted_women = []\n",
"actual_women = []\n",
"\n",
Expand Down Expand Up @@ -1814,6 +1803,7 @@
],
"source": [
"# True positive rates for men and women\n",
"\n",
"from sklearn.metrics import confusion_matrix, recall_score\n",
"\n",
"# Men\n",
Expand All @@ -1830,9 +1820,9 @@
"FN_women = conf_matrix_women[1, 0]\n",
"TPR_women = TP_women / (TP_women + FN_women)\n",
"\n",
"# Print the TPR for men\n",
"#######\n",
"\n",
"print(f\"True Positive Rate (Recall) for men: {TPR_men:.2f}\")\n",
"# Print the TPR for women\n",
"print(f\"True Positive Rate (Recall) for women: {TPR_women:.2f}\")"
]
},
Expand Down Expand Up @@ -1910,7 +1900,7 @@
"total_men = d[0] + d[1]\n",
"prediction_men = positive_men/total_men\n",
"# number of predicted good candidates for men\n",
"print(f\"Good candidates for men (predicted): {prediction_men}\")\n",
"print(f\"Good candidates for men (predicted): {prediction_men}\")\n",
"\n",
"\n",
"numpy_actual_men = numpy.array(actual_men)\n",
Expand All @@ -1920,7 +1910,7 @@
"total_men1 = d[0] + d[1]\n",
"actual_men = positive_men1/total_men1\n",
"# number of actual good candidates for men\n",
"print(f\"Good candidates for men (actual): {actual_men}\")\n",
"print(f\"Good candidates for men (actual): {actual_men}\")\n",
"\n",
"\n",
"numpy_predicted_women = numpy.array(predicted_women)\n",
Expand All @@ -1930,7 +1920,7 @@
"total_women = d[0] + d[1]\n",
"prediction_women = positive_women/total_women\n",
"# number of predicted good candidates for women\n",
"print(f\"Good candidates for women (predicted): {prediction_women}\")\n",
"print(f\"Good candidates for women (predicted): {prediction_women}\")\n",
"\n",
"\n",
"numpy_actual_women = numpy.array(actual_women)\n",
Expand All @@ -1940,7 +1930,7 @@
"total_women1 = d[0] + d[1]\n",
"actual_women = positive_women1/total_women1\n",
"# number of actual good candidates for women\n",
"print(f\"Good candidates for women (actual): {actual_women}\")"
"print(f\"Good candidates for women (actual): {actual_women}\")"
]
},
{
Expand Down Expand Up @@ -1998,7 +1988,6 @@
"TNR_actual_men = 1.0 - TPR_actual_men\n",
"TNR_actual_women = 1.0 - TPR_actual_women\n",
"\n",
"\n",
"print(\"Disparate Impact Predicted:\", DI_predicted)\n",
"print(\"Disparate Impact Actual:\", DI_actual)\n",
"\n",
Expand All @@ -2011,18 +2000,11 @@
"print(\"Equalized Odds Difference Predicted:\", EOD_positive_predicted)\n",
"print(\"Equalized Odds Difference Actual:\", EOD_positive_actual)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
Expand All @@ -2036,7 +2018,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.13"
"version": "3.8.8"
}
},
"nbformat": 4,
Expand Down

0 comments on commit 7117dc9

Please sign in to comment.