diff --git a/P4/Jupyter_Notebook_Setup_P4.ipynb b/P4/Jupyter_Notebook_Setup_P4.ipynb index 7c52fad9..091e9587 100644 --- a/P4/Jupyter_Notebook_Setup_P4.ipynb +++ b/P4/Jupyter_Notebook_Setup_P4.ipynb @@ -321,10 +321,7 @@ "id": "79228d8e", "metadata": {}, "outputs": [], - "source": [ - "#Eliminating the student id and gender\n", - "test_data =test_data.drop(columns=['Student ID', 'Gender'])" - ] + "source": [] }, { "cell_type": "code", @@ -357,7 +354,7 @@ { "cell_type": "code", "execution_count": 45, - "id": "aa56f0ae", + "id": "53ef1752", "metadata": {}, "outputs": [ { @@ -404,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 152, + "execution_count": 165, "id": "d27a24a9", "metadata": {}, "outputs": [ @@ -415,7 +412,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [152]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Plotting the distribution of the test dataset (Gender)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistribution\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxticks(rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m90\u001b[39m) \n", + "Input \u001b[0;32mIn [165]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Plotting the distribution of the test dataset (Gender)\u001b[39;00m\n\u001b[1;32m 2\u001b[0m plt\u001b[38;5;241m.\u001b[39mtitle(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGender\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m \u001b[43msns\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcountplot\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m plt\u001b[38;5;241m.\u001b[39mylabel(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mDistribution\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 5\u001b[0m plt\u001b[38;5;241m.\u001b[39mxticks(rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m90\u001b[39m) \n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/_decorators.py:46\u001b[0m, in \u001b[0;36m_deprecate_positional_args..inner_f\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 36\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 37\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPass the following variable\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m as \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124mkeyword arg\u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m: \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFrom version 0.12, the only valid positional argument \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m\n\u001b[1;32m 44\u001b[0m )\n\u001b[1;32m 45\u001b[0m kwargs\u001b[38;5;241m.\u001b[39mupdate({k: arg \u001b[38;5;28;01mfor\u001b[39;00m k, arg \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(sig\u001b[38;5;241m.\u001b[39mparameters, args)})\n\u001b[0;32m---> 46\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:3598\u001b[0m, in \u001b[0;36mcountplot\u001b[0;34m(x, y, hue, data, order, hue_order, orient, color, palette, saturation, dodge, ax, **kwargs)\u001b[0m\n\u001b[1;32m 3595\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m x \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;129;01mand\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 3596\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot pass values for both `x` and `y`\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 3598\u001b[0m plotter \u001b[38;5;241m=\u001b[39m \u001b[43m_CountPlotter\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3599\u001b[0m \u001b[43m \u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3600\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mci\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_boot\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mseed\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3601\u001b[0m \u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpalette\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msaturation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3602\u001b[0m \u001b[43m \u001b[49m\u001b[43merrcolor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrwidth\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcapsize\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdodge\u001b[49m\n\u001b[1;32m 3603\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 3605\u001b[0m plotter\u001b[38;5;241m.\u001b[39mvalue_label \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcount\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 3607\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ax \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/seaborn/categorical.py:1584\u001b[0m, in \u001b[0;36m_BarPlotter.__init__\u001b[0;34m(self, x, y, hue, data, order, hue_order, estimator, ci, n_boot, units, seed, orient, color, palette, saturation, errcolor, errwidth, capsize, dodge)\u001b[0m\n\u001b[1;32m 1579\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, x, y, hue, data, order, hue_order,\n\u001b[1;32m 1580\u001b[0m estimator, ci, n_boot, units, seed,\n\u001b[1;32m 1581\u001b[0m orient, color, palette, saturation, errcolor,\n\u001b[1;32m 1582\u001b[0m errwidth, capsize, dodge):\n\u001b[1;32m 1583\u001b[0m \u001b[38;5;124;03m\"\"\"Initialize the plotter.\"\"\"\u001b[39;00m\n\u001b[0;32m-> 1584\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mestablish_variables\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43morient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1585\u001b[0m \u001b[43m \u001b[49m\u001b[43morder\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhue_order\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43munits\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1586\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestablish_colors(color, palette, saturation)\n\u001b[1;32m 1587\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mestimate_statistic(estimator, ci, n_boot, seed)\n", @@ -589,6 +586,34 @@ "plt.show()\n" ] }, + { + "cell_type": "code", + "execution_count": 164, + "id": "1833a922", + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "\"['Student ID', 'Gender'] not found in axis\"", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "Input \u001b[0;32mIn [164]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#Eliminating the student id and gender\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m test_data \u001b[38;5;241m=\u001b[39m\u001b[43mtest_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mStudent ID\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/util/_decorators.py:311\u001b[0m, in \u001b[0;36mdeprecate_nonkeyword_arguments..decorate..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 305\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(args) \u001b[38;5;241m>\u001b[39m num_allow_args:\n\u001b[1;32m 306\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 307\u001b[0m msg\u001b[38;5;241m.\u001b[39mformat(arguments\u001b[38;5;241m=\u001b[39marguments),\n\u001b[1;32m 308\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 309\u001b[0m stacklevel\u001b[38;5;241m=\u001b[39mstacklevel,\n\u001b[1;32m 310\u001b[0m )\n\u001b[0;32m--> 311\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:4954\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4806\u001b[0m \u001b[38;5;129m@deprecate_nonkeyword_arguments\u001b[39m(version\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, allowed_args\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mself\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlabels\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 4807\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop\u001b[39m(\n\u001b[1;32m 4808\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4815\u001b[0m errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 4816\u001b[0m ):\n\u001b[1;32m 4817\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 4818\u001b[0m \u001b[38;5;124;03m Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[1;32m 4819\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 4952\u001b[0m \u001b[38;5;124;03m weight 1.0 0.8\u001b[39;00m\n\u001b[1;32m 4953\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 4954\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4955\u001b[0m \u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4956\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4957\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4958\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4959\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4960\u001b[0m \u001b[43m \u001b[49m\u001b[43minplace\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minplace\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4961\u001b[0m \u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4962\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:4267\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4265\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 4266\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4267\u001b[0m obj \u001b[38;5;241m=\u001b[39m \u001b[43mobj\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_drop_axis\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4269\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[1;32m 4270\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/generic.py:4311\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[0;34m(self, labels, axis, level, errors, consolidate, only_slice)\u001b[0m\n\u001b[1;32m 4309\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4310\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4311\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m \u001b[43maxis\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdrop\u001b[49m\u001b[43m(\u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4312\u001b[0m indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[1;32m 4314\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[1;32m 4315\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", + "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/indexes/base.py:6644\u001b[0m, in \u001b[0;36mIndex.drop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 6642\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m 6643\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 6644\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlist\u001b[39m(labels[mask])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 6645\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[1;32m 6646\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n", + "\u001b[0;31mKeyError\u001b[0m: \"['Student ID', 'Gender'] not found in axis\"" + ] + } + ], + "source": [ + "#Eliminating the student id and gender\n", + "test_data =test_data.drop(columns=['Student ID', 'Gender'])" + ] + }, { "cell_type": "code", "execution_count": 19, @@ -686,7 +711,7 @@ { "cell_type": "code", "execution_count": 56, - "id": "f8048f5e", + "id": "f6c83eba", "metadata": {}, "outputs": [ { @@ -733,7 +758,7 @@ { "cell_type": "code", "execution_count": 153, - "id": "72c1fc78", + "id": "53e9599a", "metadata": {}, "outputs": [ { @@ -749,21 +774,19 @@ ] }, { - "cell_type": "code", - "execution_count": 154, - "id": "59a603ff", + "cell_type": "markdown", + "id": "15bdf93f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Starting fairness evaluation seciton\n" - ] - } - ], "source": [ - "print (\"Starting fairness evaluation seciton\")" + "# Starting fairness evaluation seciton" + ] + }, + { + "cell_type": "markdown", + "id": "dd82eb26", + "metadata": {}, + "source": [ + "# The Classification Report includes precision, recall, and F1-score. The results show that precision for class 1 and 0 are balanced, and the F1(the mean of the precision and recall) is balanced as well for both classes 1 and 0, thus this classification report suggests a well-performing model that has room for further improvments, this conclusion was reached because of the distribution of false positives and false negatives. " ] }, { @@ -789,7 +812,8 @@ } ], "source": [ - "# Classification Report\n", + "\n", + "\n", "from sklearn.metrics import classification_report\n", "report = classification_report(Y_true, prediction)\n", "print(report)\n", @@ -864,19 +888,17 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "3b8f50d5", + "cell_type": "markdown", + "id": "ac92ce1f", "metadata": {}, - "outputs": [], "source": [ - "\n" + "# The True positives comparisons for the features: Gender, Age, Major. (Equality of opportunity)\n" ] }, { "cell_type": "code", "execution_count": 76, - "id": "85ec00cd", + "id": "1f4f01de", "metadata": {}, "outputs": [ { @@ -923,8 +945,8 @@ }, { "cell_type": "code", - "execution_count": 155, - "id": "c5b9baa3", + "execution_count": 166, + "id": "95c6893f", "metadata": {}, "outputs": [ { @@ -949,6 +971,8 @@ "prediction = model.predict(x_test)\n", "\n", "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_major = test_data.copy()\n", + "\n", "evaluation_using_major['Prediction'] = prediction\n", "\n", "# Filterring rows where 'Good Candidate' is 1 \n", @@ -961,7 +985,7 @@ { "cell_type": "code", "execution_count": 156, - "id": "665c2f28", + "id": "741f978e", "metadata": {}, "outputs": [ { @@ -1000,10 +1024,18 @@ "print(tpr_age)\n" ] }, + { + "cell_type": "markdown", + "id": "9c0fab85", + "metadata": {}, + "source": [ + "# The Precision Scores for Gender, Age and Major (The predicitve Parity where the precision of groups is compared)" + ] + }, { "cell_type": "code", - "execution_count": 157, - "id": "fbd2b374", + "execution_count": 169, + "id": "905ce3da", "metadata": {}, "outputs": [ { @@ -1029,6 +1061,8 @@ "prediction = model.predict(x_test) \n", "\n", "# Add the 'Prediction' column to evaluation_using_major\n", + "evaluation_using_major = test_data.copy()\n", + "\n", "evaluation_using_major['Prediction'] = prediction\n", "\n", "# Calculate precision by 'Major'\n", @@ -1042,8 +1076,8 @@ }, { "cell_type": "code", - "execution_count": 158, - "id": "13831f9d", + "execution_count": 172, + "id": "9f4bfbb8", "metadata": {}, "outputs": [ { @@ -1053,7 +1087,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [158]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 7\u001b[0m evaluation_using_major[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m prediction\n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# Calculate precision by 'Major'\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m precision_major \u001b[38;5;241m=\u001b[39m \u001b[43mevaluation_using_major\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: precision_score(x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGood Candidate\u001b[39m\u001b[38;5;124m'\u001b[39m], x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m], zero_division\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 12\u001b[0m )\n\u001b[1;32m 14\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision by Gender:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 15\u001b[0m \u001b[38;5;28mprint\u001b[39m(precision_gender)\n", + "Input \u001b[0;32mIn [172]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 9\u001b[0m evaluation_using_gender[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m prediction\n\u001b[1;32m 11\u001b[0m \u001b[38;5;66;03m# Calculate precision by 'Major'\u001b[39;00m\n\u001b[0;32m---> 12\u001b[0m precision_gender \u001b[38;5;241m=\u001b[39m \u001b[43mevaluation_using_gender\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgroupby\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mGender\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mapply(\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mlambda\u001b[39;00m x: precision_score(x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mGood Candidate\u001b[39m\u001b[38;5;124m'\u001b[39m], x[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mPrediction\u001b[39m\u001b[38;5;124m'\u001b[39m], zero_division\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 14\u001b[0m )\n\u001b[1;32m 16\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision by Gender:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28mprint\u001b[39m(precision_gender)\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/frame.py:7718\u001b[0m, in \u001b[0;36mDataFrame.groupby\u001b[0;34m(self, by, axis, level, as_index, sort, group_keys, squeeze, observed, dropna)\u001b[0m\n\u001b[1;32m 7713\u001b[0m axis \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n\u001b[1;32m 7715\u001b[0m \u001b[38;5;66;03m# https://github.com/python/mypy/issues/7642\u001b[39;00m\n\u001b[1;32m 7716\u001b[0m \u001b[38;5;66;03m# error: Argument \"squeeze\" to \"DataFrameGroupBy\" has incompatible type\u001b[39;00m\n\u001b[1;32m 7717\u001b[0m \u001b[38;5;66;03m# \"Union[bool, NoDefault]\"; expected \"bool\"\u001b[39;00m\n\u001b[0;32m-> 7718\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mDataFrameGroupBy\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 7719\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7720\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mby\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7721\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7722\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7723\u001b[0m \u001b[43m \u001b[49m\u001b[43mas_index\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mas_index\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7724\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7725\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroup_keys\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroup_keys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7726\u001b[0m \u001b[43m \u001b[49m\u001b[43msqueeze\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msqueeze\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# type: ignore[arg-type]\u001b[39;49;00m\n\u001b[1;32m 7727\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7728\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7729\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/groupby.py:882\u001b[0m, in \u001b[0;36mGroupBy.__init__\u001b[0;34m(self, obj, keys, axis, level, grouper, exclusions, selection, as_index, sort, group_keys, squeeze, observed, mutated, dropna)\u001b[0m\n\u001b[1;32m 879\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m grouper \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 880\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgroupby\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgrouper\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m get_grouper\n\u001b[0;32m--> 882\u001b[0m grouper, exclusions, obj \u001b[38;5;241m=\u001b[39m \u001b[43mget_grouper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mkeys\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43maxis\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maxis\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlevel\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43msort\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msort\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[43m \u001b[49m\u001b[43mobserved\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mobserved\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[43m \u001b[49m\u001b[43mmutated\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmutated\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 890\u001b[0m \u001b[43m \u001b[49m\u001b[43mdropna\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdropna\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 891\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 893\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mobj \u001b[38;5;241m=\u001b[39m obj\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxis \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_get_axis_number(axis)\n", "File \u001b[0;32m~/miniconda3/lib/python3.9/site-packages/pandas/core/groupby/grouper.py:882\u001b[0m, in \u001b[0;36mget_grouper\u001b[0;34m(obj, key, axis, level, sort, observed, mutated, validate, dropna)\u001b[0m\n\u001b[1;32m 880\u001b[0m in_axis, level, gpr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, gpr, \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 881\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 882\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(gpr)\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(gpr, Grouper) \u001b[38;5;129;01mand\u001b[39;00m gpr\u001b[38;5;241m.\u001b[39mkey \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 884\u001b[0m \u001b[38;5;66;03m# Add key to exclusions\u001b[39;00m\n\u001b[1;32m 885\u001b[0m exclusions\u001b[38;5;241m.\u001b[39madd(gpr\u001b[38;5;241m.\u001b[39mkey)\n", @@ -1068,11 +1102,13 @@ "x_test = test_data.drop(columns=['Good Candidate'], axis=1)\n", "prediction = model.predict(x_test) \n", "\n", - "# Add the 'Prediction' column to evaluation_using_major\n", - "evaluation_using_major['Prediction'] = prediction\n", + "# Add the 'Prediction' column\n", + "evaluation_using_gender = test_data.copy()\n", + "\n", + "evaluation_using_gender['Prediction'] = prediction\n", "\n", "# Calculate precision by 'Major'\n", - "precision_major = evaluation_using_major.groupby('Gender').apply(\n", + "precision_gender = evaluation_using_gender.groupby('Gender').apply(\n", " lambda x: precision_score(x['Good Candidate'], x['Prediction'], zero_division=0)\n", ")\n", "\n", @@ -1083,7 +1119,7 @@ { "cell_type": "code", "execution_count": 159, - "id": "9f37cb6b", + "id": "d7b73c7b", "metadata": {}, "outputs": [ { @@ -1125,7 +1161,7 @@ { "cell_type": "code", "execution_count": 160, - "id": "10609ae7", + "id": "be49068f", "metadata": {}, "outputs": [ { @@ -1162,17 +1198,17 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "85ce33a4", + "cell_type": "markdown", + "id": "24b8485b", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# First Fairness evaluation Startegy/Metric: Group Unaware --> This metric evaluates if the model is feature-agnostic with regard to groups. This is essential to make sure biases based on these characteristics are not reinforced or amplified by the model." + ] }, { "cell_type": "code", - "execution_count": 161, - "id": "d5ad5823", + "execution_count": 173, + "id": "c335ceac", "metadata": {}, "outputs": [ { @@ -1185,14 +1221,14 @@ ], "source": [ "age_checking = 'Age' in x_test.columns\n", - "group_unaware_status_age = \"Group Unaware\" if not is_age_used else \"Group Aware\"\n", + "group_unaware_status_age = \"Group Unaware\" if not age_checking else \"Group Aware\"\n", "print(\"The model is \" + group_unaware_status_age + \" for Age attribute.\")\n" ] }, { "cell_type": "code", "execution_count": 162, - "id": "7a9a4f21", + "id": "fee4e3f6", "metadata": {}, "outputs": [ { @@ -1212,7 +1248,7 @@ { "cell_type": "code", "execution_count": 163, - "id": "1d7a200f", + "id": "c198c926", "metadata": {}, "outputs": [ { @@ -1229,10 +1265,18 @@ "print(\"The model is \" + group_unaware_status_major + \" for Gender attribute.\")" ] }, + { + "cell_type": "markdown", + "id": "7676a982", + "metadata": {}, + "source": [ + "#  Demographic Parity measures how evenly a model's favorable predictions are dispersed among various demographic groupings. It aims for comparable success rates for all groups, irrespective of variables like age or gender. By measuring this, the metric makes sure that the model does not behave biasedly in favor of or against any certain demographic segment. Reaching Demographic Parity promotes inclusivity and equal treatment. For a thorough review, fairness indicators must be balanced with the overall performance of the model." + ] + }, { "cell_type": "code", "execution_count": 135, - "id": "abd88986", + "id": "6e7ddfd5", "metadata": {}, "outputs": [], "source": [ @@ -1252,7 +1296,7 @@ { "cell_type": "code", "execution_count": 136, - "id": "f021f671", + "id": "8ddca0b2", "metadata": {}, "outputs": [ { @@ -1278,7 +1322,7 @@ { "cell_type": "code", "execution_count": 137, - "id": "8d6135bc", + "id": "6a25b821", "metadata": {}, "outputs": [ { @@ -1306,7 +1350,7 @@ { "cell_type": "code", "execution_count": 138, - "id": "422045d2", + "id": "a93d3a0f", "metadata": {}, "outputs": [ { @@ -1330,53 +1374,17 @@ ] }, { - "cell_type": "code", - "execution_count": 112, - "id": "c036b656", + "cell_type": "markdown", + "id": "fbc4bdb4", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Good candidates for men (predicted): 0.6\n", - "Good candidates for women, predicted: 0.6\n" - ] - } - ], "source": [ - "# #Demographic Parity\n", - "# import numpy as np\n", - "\n", - "# # Example: Define predicted and actual values for men\n", - "# predicted_men = [1, 0, 1, 1, 0]\n", - "# actual_men = [1, 1, 0, 1, 0]\n", - "\n", - "# # Function to calculate positive rate\n", - "# def calculate_positive_rate(predictions, actuals):\n", - "# numpy_predictions = np.array(predictions)\n", - "# positive_count = np.sum(numpy_predictions == 1)\n", - "# total_count = len(predictions)\n", - "# return positive_count / total_count\n", - "\n", - "# # Calculate and print positive rates for men\n", - "# prediction_men = calculate_positive_rate(predicted_men, actual_men)\n", - "# print(f\"Good candidates for men (predicted): {prediction_men}\")\n", - "\n", - "# # Example: Define predicted and actual values for women\n", - "# predicted_women = [0, 1, 0, 1, 1]\n", - "# actual_women = [1, 0, 1, 0, 1]\n", - "\n", - "# # Calculate and print positive rates for women\n", - "# prediction_women = calculate_positive_rate(predicted_women, actual_women)\n", - "# print(f\"Good candidates for women, predicted: {prediction_women}\")\n", - "# x\n" + "# The fairness evaluation's equality odds quantify the distribution of real positive rates among the various demographic categories. By taking into account both true positives and false positives, it seeks to guarantee fairly accurate predictions for various subgroups. The metric is designed to address any differences in model performance by focusing on equalizing the probability of good predictions. Aiming for Equality Odds reduces disparities in expected outcomes between different groups, which advances justice. It should be taken into account for a thorough evaluation of fairness together with the model's overall performance." ] }, { "cell_type": "code", "execution_count": null, - "id": "545eae10", + "id": "7d7f235c", "metadata": {}, "outputs": [], "source": [ @@ -1386,7 +1394,7 @@ { "cell_type": "code", "execution_count": 132, - "id": "ac95da67", + "id": "e8a504b1", "metadata": {}, "outputs": [], "source": [ @@ -1412,7 +1420,7 @@ { "cell_type": "code", "execution_count": 133, - "id": "fb06baa5", + "id": "92116453", "metadata": {}, "outputs": [ { @@ -1452,7 +1460,7 @@ { "cell_type": "code", "execution_count": 130, - "id": "55a667a3", + "id": "e44fbf21", "metadata": {}, "outputs": [ { @@ -1489,7 +1497,7 @@ { "cell_type": "code", "execution_count": 127, - "id": "dce69565", + "id": "2a267478", "metadata": {}, "outputs": [ { @@ -1515,7 +1523,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4099f31e", + "id": "26ff0af4", "metadata": {}, "outputs": [], "source": []