diff --git a/P4/Jupyter_Notebook_Setup_P4.ipynb b/P4/Jupyter_Notebook_Setup_P4.ipynb index 091e9587..8e7e7648 100644 --- a/P4/Jupyter_Notebook_Setup_P4.ipynb +++ b/P4/Jupyter_Notebook_Setup_P4.ipynb @@ -354,7 +354,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "53ef1752", + "id": "38d1d859", "metadata": {}, "outputs": [ { @@ -401,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 165, + "execution_count": 176, "id": "d27a24a9", "metadata": {}, "outputs": [ @@ -412,7 +412,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [165]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Plotting the distribution of the test dataset (Gender)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistribution\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxticks(rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m90\u001b[39m) \n", + "Input \u001b[0;32mIn [176]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Plotting the distribution of the test dataset (Gender)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistribution\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxticks(rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m90\u001b[39m) \n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/_decorators.py:46\u001b[0m, in \u001b[0;36m_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPass the following variable\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m as \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124mkeyword arg\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFrom version 0.12, the only valid positional argument \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m\n\u001b[1;32m 44\u001b[0m )\n\u001b[1;32m 45\u001b[0m kwargs\u001b[38;5;241m.\u001b[39mupdate({k: arg \u001b[38;5;28;01mfor\u001b[39;00m k, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)})\n\u001b[0;32m---> 46\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:3598\u001b[0m, in \u001b[0;36mcountplot\u001b[0;34m(x, y, hue, data, order, hue_order, orient, color, palette, saturation, dodge, ax, **kwargs)\u001b[0m\n\u001b[1;32m 3595\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m x \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3596\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot pass values for both `x` and `y`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 3598\u001b[0m plotter \u001b[38;5;241m=\u001b[39m \u001b[43m_CountPlotter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3599\u001b[0m \u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3600\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mci\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_boot\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3601\u001b[0m \u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpalette\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msaturation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3602\u001b[0m \u001b[43m \u001b[49m\u001b[43merrcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrwidth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcapsize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdodge\u001b[49m\n\u001b[1;32m 3603\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3605\u001b[0m plotter\u001b[38;5;241m.\u001b[39mvalue_label \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ax \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:1584\u001b[0m, in \u001b[0;36m_BarPlotter.__init__\u001b[0;34m(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, x, y, hue, data, order, hue_order,\n\u001b[1;32m 1580\u001b[0m estimator, ci, n_boot, units, seed,\n\u001b[1;32m 1581\u001b[0m orient, color, palette, saturation, errcolor,\n\u001b[1;32m 1582\u001b[0m errwidth, capsize, dodge):\n\u001b[1;32m 1583\u001b[0m \u001b[38;5;124;03m\"\"\"Initialize the plotter.\"\"\"\u001b[39;00m\n\u001b[0;32m-> 1584\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mestablish_variables\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1585\u001b[0m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1586\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestablish_colors(color, palette, saturation)\n\u001b[1;32m 1587\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_statistic(estimator, ci, n_boot, seed)\n", @@ -588,30 +588,13 @@ }, { "cell_type": "code", - "execution_count": 164, - "id": "1833a922", + "execution_count": 174, + "id": "117ec892", "metadata": {}, - "outputs": [ - { - "ename": "KeyError", - "evalue": "\"['Student ID', 'Gender'] not found in axis\"", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [164]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Eliminating the student id and gender\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m test_data \u001b[38;5;241m=\u001b[39m\u001b[43mtest_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mStudent ID\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/util/_decorators.py:311\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[1;32m 306\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 307\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39marguments),\n\u001b[1;32m 308\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 309\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mstacklevel,\n\u001b[1;32m 310\u001b[0m )\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:4954\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4806\u001b[0m \u001b[38;5;129m@deprecate_nonkeyword_arguments\u001b[39m(version\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, allowed_args\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 4807\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop\u001b[39m(\n\u001b[1;32m 4808\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4815\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4816\u001b[0m ):\n\u001b[1;32m 4817\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4818\u001b[0m \u001b[38;5;124;03m Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[1;32m 4819\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4952\u001b[0m \u001b[38;5;124;03m weight 1.0 0.8\u001b[39;00m\n\u001b[1;32m 4953\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4954\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4955\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4956\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4957\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4958\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4959\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4960\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4961\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4962\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:4267\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4265\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 4266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4267\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_drop_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[1;32m 4270\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:4311\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[0;34m(self, labels, axis, level, errors, consolidate, only_slice)\u001b[0m\n\u001b[1;32m 4309\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4311\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m \u001b[43maxis\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4312\u001b[0m indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[1;32m 4314\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[1;32m 4315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", - "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6644\u001b[0m, in \u001b[0;36mIndex.drop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 6642\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m 6643\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 6644\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(labels[mask])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6645\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[1;32m 6646\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n", - "\u001b[0;31mKeyError\u001b[0m: \"['Student ID', 'Gender'] not found in axis\"" - ] - } - ], + "outputs": [], "source": [ "#Eliminating the student id and gender\n", - "test_data =test_data.drop(columns=['Student ID', 'Gender'])" + "# test_data =test_data.drop(columns=['Student ID', 'Gender'])" ] }, { @@ -711,7 +694,7 @@ { "cell_type": "code", "execution_count": 56, - "id": "f6c83eba", + "id": "ab742256", "metadata": {}, "outputs": [ { @@ -758,7 +741,7 @@ { "cell_type": "code", "execution_count": 153, - "id": "53e9599a", + "id": "13d7ec58", "metadata": {}, "outputs": [ { @@ -775,7 +758,7 @@ }, { "cell_type": "markdown", - "id": "15bdf93f", + "id": "2ad26ea8", "metadata": {}, "source": [ "# Starting fairness evaluation seciton" @@ -783,7 +766,7 @@ }, { "cell_type": "markdown", - "id": "dd82eb26", + "id": "d971637a", "metadata": {}, "source": [ "# The Classification Report includes precision, recall, and F1-score. The results show that precision for class 1 and 0 are balanced, and the F1(the mean of the precision and recall) is balanced as well for both classes 1 and 0, thus this classification report suggests a well-performing model that has room for further improvments, this conclusion was reached because of the distribution of false positives and false negatives. " @@ -889,7 +872,7 @@ }, { "cell_type": "markdown", - "id": "ac92ce1f", + "id": "e588ee9a", "metadata": {}, "source": [ "# The True positives comparisons for the features: Gender, Age, Major. (Equality of opportunity)\n" @@ -898,7 +881,7 @@ { "cell_type": "code", "execution_count": 76, - "id": "1f4f01de", + "id": "e5ebc31f", "metadata": {}, "outputs": [ { @@ -946,7 +929,7 @@ { "cell_type": "code", "execution_count": 166, - "id": "95c6893f", + "id": "0852ad34", "metadata": {}, "outputs": [ { @@ -985,7 +968,7 @@ { "cell_type": "code", "execution_count": 156, - "id": "741f978e", + "id": "63aeac92", "metadata": {}, "outputs": [ { @@ -1026,7 +1009,7 @@ }, { "cell_type": "markdown", - "id": "9c0fab85", + "id": "6d159d9f", "metadata": {}, "source": [ "# The Precision Scores for Gender, Age and Major (The predicitve Parity where the precision of groups is compared)" @@ -1035,7 +1018,7 @@ { "cell_type": "code", "execution_count": 169, - "id": "905ce3da", + "id": "0adb7e3a", "metadata": {}, "outputs": [ { @@ -1077,7 +1060,7 @@ { "cell_type": "code", "execution_count": 172, - "id": "9f4bfbb8", + "id": "c720f4b0", "metadata": {}, "outputs": [ { @@ -1119,7 +1102,7 @@ { "cell_type": "code", "execution_count": 159, - "id": "d7b73c7b", + "id": "44d6105e", "metadata": {}, "outputs": [ { @@ -1161,7 +1144,7 @@ { "cell_type": "code", "execution_count": 160, - "id": "be49068f", + "id": "80807a3d", "metadata": {}, "outputs": [ { @@ -1199,7 +1182,7 @@ }, { "cell_type": "markdown", - "id": "24b8485b", + "id": "93edb3f5", "metadata": {}, "source": [ "# First Fairness evaluation Startegy/Metric: Group Unaware --> This metric evaluates if the model is feature-agnostic with regard to groups. This is essential to make sure biases based on these characteristics are not reinforced or amplified by the model." @@ -1208,7 +1191,7 @@ { "cell_type": "code", "execution_count": 173, - "id": "c335ceac", + "id": "cb3859f1", "metadata": {}, "outputs": [ { @@ -1228,7 +1211,7 @@ { "cell_type": "code", "execution_count": 162, - "id": "fee4e3f6", + "id": "dff3b25a", "metadata": {}, "outputs": [ { @@ -1248,7 +1231,7 @@ { "cell_type": "code", "execution_count": 163, - "id": "c198c926", + "id": "988f583d", "metadata": {}, "outputs": [ { @@ -1267,7 +1250,7 @@ }, { "cell_type": "markdown", - "id": "7676a982", + "id": "d537fece", "metadata": {}, "source": [ "#  Demographic Parity measures how evenly a model's favorable predictions are dispersed among various demographic groupings. It aims for comparable success rates for all groups, irrespective of variables like age or gender. By measuring this, the metric makes sure that the model does not behave biasedly in favor of or against any certain demographic segment. Reaching Demographic Parity promotes inclusivity and equal treatment. For a thorough review, fairness indicators must be balanced with the overall performance of the model." @@ -1276,7 +1259,7 @@ { "cell_type": "code", "execution_count": 135, - "id": "6e7ddfd5", + "id": "ba9b527c", "metadata": {}, "outputs": [], "source": [ @@ -1296,7 +1279,7 @@ { "cell_type": "code", "execution_count": 136, - "id": "8ddca0b2", + "id": "c488645a", "metadata": {}, "outputs": [ { @@ -1322,7 +1305,7 @@ { "cell_type": "code", "execution_count": 137, - "id": "6a25b821", + "id": "90c37717", "metadata": {}, "outputs": [ { @@ -1350,7 +1333,7 @@ { "cell_type": "code", "execution_count": 138, - "id": "a93d3a0f", + "id": "985e6ad0", "metadata": {}, "outputs": [ { @@ -1375,7 +1358,7 @@ }, { "cell_type": "markdown", - "id": "fbc4bdb4", + "id": "60487d96", "metadata": {}, "source": [ "# The fairness evaluation's equality odds quantify the distribution of real positive rates among the various demographic categories. By taking into account both true positives and false positives, it seeks to guarantee fairly accurate predictions for various subgroups. The metric is designed to address any differences in model performance by focusing on equalizing the probability of good predictions. Aiming for Equality Odds reduces disparities in expected outcomes between different groups, which advances justice. It should be taken into account for a thorough evaluation of fairness together with the model's overall performance." @@ -1384,7 +1367,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7d7f235c", + "id": "99b4e55b", "metadata": {}, "outputs": [], "source": [ @@ -1394,7 +1377,7 @@ { "cell_type": "code", "execution_count": 132, - "id": "e8a504b1", + "id": "29f5ab66", "metadata": {}, "outputs": [], "source": [ @@ -1420,7 +1403,7 @@ { "cell_type": "code", "execution_count": 133, - "id": "92116453", + "id": "b4aa24a9", "metadata": {}, "outputs": [ { @@ -1460,7 +1443,7 @@ { "cell_type": "code", "execution_count": 130, - "id": "e44fbf21", + "id": "4d265ea9", "metadata": {}, "outputs": [ { @@ -1497,7 +1480,7 @@ { "cell_type": "code", "execution_count": 127, - "id": "2a267478", + "id": "7ee72e70", "metadata": {}, "outputs": [ { @@ -1523,7 +1506,7 @@ { "cell_type": "code", "execution_count": null, - "id": "26ff0af4", + "id": "26197703", "metadata": {}, "outputs": [], "source": []