From 9877e8452e2a6e065bf66769a9f00ec129a1c5ec Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Thu, 4 Mar 2021 16:16:03 +0100 Subject: [PATCH 1/6] Added a question in the statistics I figured that we can also use the year column of US baby names dataset for statistics as well. I created a question to find the year with highest number of baby names --- 06_Stats/US_Baby_Names/Exercises.ipynb | 74 ++- .../Exercises_with_solutions.ipynb | 421 ++++++------------ 06_Stats/US_Baby_Names/Solutions.ipynb | 145 +++--- 3 files changed, 260 insertions(+), 380 deletions(-) diff --git a/06_Stats/US_Baby_Names/Exercises.ipynb b/06_Stats/US_Baby_Names/Exercises.ipynb index f9f79e7ae..6686c797c 100644 --- a/06_Stats/US_Baby_Names/Exercises.ipynb +++ b/06_Stats/US_Baby_Names/Exercises.ipynb @@ -23,9 +23,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -46,9 +44,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -62,9 +58,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -78,9 +72,21 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 7. What year has the highest number of baby names in the dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, "outputs": [], "source": [] }, @@ -94,9 +100,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -110,9 +114,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -126,9 +128,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -142,9 +142,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -158,9 +156,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -174,9 +170,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -190,9 +184,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -206,9 +198,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] } @@ -216,23 +206,23 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.11" + "pygments_lexer": "ipython3", + "version": "3.9.1" } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } diff --git a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb index f83fecad6..3099ca721 100644 --- a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb +++ b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb @@ -57,13 +57,15 @@ "\n", "RangeIndex: 1016395 entries, 0 to 1016394\n", "Data columns (total 7 columns):\n", - "Unnamed: 0 1016395 non-null int64\n", - "Id 1016395 non-null int64\n", - "Name 1016395 non-null object\n", - "Year 1016395 non-null int64\n", - "Gender 1016395 non-null object\n", - "State 1016395 non-null object\n", - "Count 1016395 non-null int64\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Unnamed: 0 1016395 non-null int64 \n", + " 1 Id 1016395 non-null int64 \n", + " 2 Name 1016395 non-null object\n", + " 3 Year 1016395 non-null int64 \n", + " 4 Gender 1016395 non-null object\n", + " 5 State 1016395 non-null object\n", + " 6 Count 1016395 non-null int64 \n", "dtypes: int64(4), object(3)\n", "memory usage: 54.3+ MB\n" ] @@ -90,6 +92,19 @@ "data": { "text/html": [ "
\n", + "\n", "\n", " \n", " \n", @@ -247,6 +262,19 @@ "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -331,13 +359,41 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Are there more male or female names in the dataset?" + "### Step 7. What year has the highest number of baby names in the dataset?" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Count 2007\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "baby_names.groupby(\"Year\").sum().idxmax()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 6. Are there more male or female names in the dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, "outputs": [ { "data": { @@ -347,7 +403,7 @@ "Name: Gender, dtype: int64" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -365,7 +421,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -379,6 +435,19 @@ "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -425,7 +494,7 @@ "Isabella 204798" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -456,7 +525,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -465,7 +534,7 @@ "17632" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -485,7 +554,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -494,7 +563,7 @@ "'Jacob'" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -516,7 +585,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -525,7 +594,7 @@ "2578" ] }, - "execution_count": 9, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -543,13 +612,26 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -583,210 +665,10 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", " \n", " \n", " \n", @@ -812,74 +694,24 @@ "" ], "text/plain": [ - " Count\n", - "Name \n", - "Aishani 49\n", - "Alara 49\n", - "Alysse 49\n", - "Ameir 49\n", - "Anely 49\n", - "Antonina 49\n", - "Aveline 49\n", - "Aziah 49\n", - "Baily 49\n", - "Caleah 49\n", - "Carlota 49\n", - "Cristine 49\n", - "Dahlila 49\n", - "Darvin 49\n", - "Deante 49\n", - "Deserae 49\n", - "Devean 49\n", - "Elizah 49\n", - "Emmaly 49\n", - "Emmanuela 49\n", - "Envy 49\n", - "Esli 49\n", - "Fay 49\n", - "Gurshaan 49\n", - "Hareem 49\n", - "Iven 49\n", - "Jaice 49\n", - "Jaiyana 49\n", - "Jamiracle 49\n", - "Jelissa 49\n", - "... ...\n", - "Kyndle 49\n", - "Kynsley 49\n", - "Leylanie 49\n", - "Maisha 49\n", - "Malillany 49\n", - "Mariann 49\n", - "Marquell 49\n", - "Maurilio 49\n", - "Mckynzie 49\n", - "Mehdi 49\n", - "Nabeel 49\n", - "Nalleli 49\n", - "Nassir 49\n", - "Nazier 49\n", - "Nishant 49\n", - "Rebecka 49\n", - "Reghan 49\n", - "Ridwan 49\n", - "Riot 49\n", - "Rubin 49\n", - "Ryatt 49\n", - "Sameera 49\n", - "Sanjuanita 49\n", - "Shalyn 49\n", - "Skylie 49\n", - "Sriram 49\n", - "Trinton 49\n", - "Vita 49\n", - "Yoni 49\n", - "Zuleima 49\n", + " Count\n", + "Name \n", + "Aishani 49\n", + "Alara 49\n", + "Alysse 49\n", + "Ameir 49\n", + "Anely 49\n", + "... ...\n", + "Sriram 49\n", + "Trinton 49\n", + "Vita 49\n", + "Yoni 49\n", + "Zuleima 49\n", "\n", "[66 rows x 1 columns]" ] }, - "execution_count": 10, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -897,16 +729,16 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "11006.069467891111" + "11006.06946789057" ] }, - "execution_count": 11, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -924,13 +756,26 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", + "\n", "
49
Antonina49
Aveline49
Aziah49
Baily49
Caleah49
Carlota49
Cristine49
Dahlila49
Darvin49
Deante49
Deserae49
Devean49
Elizah49
Emmaly49
Emmanuela49
Envy49
Esli49
Fay49
Gurshaan49
Hareem49
Iven49
Jaice49
Jaiyana49
Jamiracle49
Jelissa49
......
Kyndle49
Kynsley49
Leylanie49
Maisha49
Malillany49
Mariann49
Marquell49
Maurilio49
Mckynzie49
Mehdi49
Nabeel49
Nalleli49
Nassir49
Nazier49
Nishant49
Rebecka49
Reghan49
Ridwan49
Riot49
Rubin49
Ryatt49
Sameera49
Sanjuanita49
Shalyn49
Skylie49
Sriram49
\n", " \n", " \n", @@ -987,7 +832,7 @@ "max 242874.000000" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -1014,7 +859,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.3" + "version": "3.9.1" }, "toc": { "base_numbering": 1, diff --git a/06_Stats/US_Baby_Names/Solutions.ipynb b/06_Stats/US_Baby_Names/Solutions.ipynb index 9acac382e..4d90fb2cd 100644 --- a/06_Stats/US_Baby_Names/Solutions.ipynb +++ b/06_Stats/US_Baby_Names/Solutions.ipynb @@ -4,7 +4,9 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# US - Baby Names" + "# US - Baby Names\n", + "\n", + "Check out [Baby Names Exercises Video Tutorial](https://youtu.be/Daf2QNAy-qA) to watch a data scientist go through the exercises" ] }, { @@ -23,9 +25,7 @@ { "cell_type": "code", "execution_count": 1, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [], "source": [] }, @@ -46,9 +46,7 @@ { "cell_type": "code", "execution_count": 2, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -57,13 +55,15 @@ "\n", "RangeIndex: 1016395 entries, 0 to 1016394\n", "Data columns (total 7 columns):\n", - "Unnamed: 0 1016395 non-null int64\n", - "Id 1016395 non-null int64\n", - "Name 1016395 non-null object\n", - "Year 1016395 non-null int64\n", - "Gender 1016395 non-null object\n", - "State 1016395 non-null object\n", - "Count 1016395 non-null int64\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 Unnamed: 0 1016395 non-null int64 \n", + " 1 Id 1016395 non-null int64 \n", + " 2 Name 1016395 non-null object\n", + " 3 Year 1016395 non-null int64 \n", + " 4 Gender 1016395 non-null object\n", + " 5 State 1016395 non-null object\n", + " 6 Count 1016395 non-null int64 \n", "dtypes: int64(4), object(3)\n", "memory usage: 54.3+ MB\n" ] @@ -81,14 +81,25 @@ { "cell_type": "code", "execution_count": 3, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -238,14 +249,25 @@ { "cell_type": "code", "execution_count": 4, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", + "\n", "
\n", " \n", " \n", @@ -318,6 +340,32 @@ ], "source": [] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Step 7. What year has the highest number of baby names in the dataset?" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Count 2007\n", + "dtype: int64" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [] + }, { "cell_type": "markdown", "metadata": {}, @@ -328,9 +376,7 @@ { "cell_type": "code", "execution_count": 5, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -357,9 +403,7 @@ { "cell_type": "code", "execution_count": 6, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "name": "stdout", @@ -435,9 +479,7 @@ { "cell_type": "code", "execution_count": 7, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -462,9 +504,7 @@ { "cell_type": "code", "execution_count": 8, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -489,9 +529,7 @@ { "cell_type": "code", "execution_count": 9, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -516,9 +554,7 @@ { "cell_type": "code", "execution_count": 10, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -870,9 +906,7 @@ { "cell_type": "code", "execution_count": 11, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -897,9 +931,7 @@ { "cell_type": "code", "execution_count": 12, - "metadata": { - "collapsed": false - }, + "metadata": {}, "outputs": [ { "data": { @@ -972,23 +1004,36 @@ "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [default]", + "display_name": "Python 3", "language": "python", - "name": "python2" + "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.12" + "pygments_lexer": "ipython3", + "version": "3.9.1" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false } }, "nbformat": 4, - "nbformat_minor": 0 + "nbformat_minor": 1 } From 4f4d807ff252f51ec93b9bfb98bd0a4284a3504e Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Thu, 4 Mar 2021 16:21:41 +0100 Subject: [PATCH 2/6] Update Solutions.ipynb --- 06_Stats/US_Baby_Names/Solutions.ipynb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/06_Stats/US_Baby_Names/Solutions.ipynb b/06_Stats/US_Baby_Names/Solutions.ipynb index 4d90fb2cd..9266b9473 100644 --- a/06_Stats/US_Baby_Names/Solutions.ipynb +++ b/06_Stats/US_Baby_Names/Solutions.ipynb @@ -4,9 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# US - Baby Names\n", - "\n", - "Check out [Baby Names Exercises Video Tutorial](https://youtu.be/Daf2QNAy-qA) to watch a data scientist go through the exercises" + "# US - Baby Names" ] }, { From 360f2d8d31592ab58c3b9f808f00415a381650f6 Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Mon, 8 Mar 2021 15:30:22 +0100 Subject: [PATCH 3/6] Added a question to the US baby names statistics exercise I figured that we can also use the year column of US baby names dataset for statistics as well. I created a question to find the year with highest number of baby names. --- 06_Stats/US_Baby_Names/Exercises.ipynb | 18 +++++++++--------- .../Exercises_with_solutions.ipynb | 18 +++++++++--------- 06_Stats/US_Baby_Names/Solutions.ipynb | 18 +++++++++--------- 3 files changed, 27 insertions(+), 27 deletions(-) diff --git a/06_Stats/US_Baby_Names/Exercises.ipynb b/06_Stats/US_Baby_Names/Exercises.ipynb index 6686c797c..f6f75016f 100644 --- a/06_Stats/US_Baby_Names/Exercises.ipynb +++ b/06_Stats/US_Baby_Names/Exercises.ipynb @@ -80,7 +80,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What year has the highest number of baby names in the dataset?" + "### Step 6. What year has the highest number of baby names in the dataset?" ] }, { @@ -94,7 +94,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Is there more male or female names in the dataset?" + "### Step 7. Is there more male or female names in the dataset?" ] }, { @@ -108,7 +108,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Step 8. Group the dataset by name and assign to names" ] }, { @@ -122,7 +122,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### Step 9. How many different names exist in the dataset?" ] }, { @@ -136,7 +136,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### Step 10. What is the name with most occurrences?" ] }, { @@ -150,7 +150,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### Step 11. How many different names have the least occurrences?" ] }, { @@ -164,7 +164,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### Step 12. What is the median name occurrence?" ] }, { @@ -178,7 +178,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### Step 13. What is the standard deviation of names?" ] }, { @@ -192,7 +192,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Step 14. Get a summary with the mean, min, max, std and quartiles." ] }, { diff --git a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb index 3099ca721..5ae8e7175 100644 --- a/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb +++ b/06_Stats/US_Baby_Names/Exercises_with_solutions.ipynb @@ -359,7 +359,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What year has the highest number of baby names in the dataset?" + "### Step 6. What year has the highest number of baby names in the dataset?" ] }, { @@ -387,7 +387,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Are there more male or female names in the dataset?" + "### Step 7. Are there more male or female names in the dataset?" ] }, { @@ -416,7 +416,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Step 8. Group the dataset by name and assign to names" ] }, { @@ -520,7 +520,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### Step 9. How many different names exist in the dataset?" ] }, { @@ -549,7 +549,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### Step 10. What is the name with most occurrences?" ] }, { @@ -580,7 +580,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### Step 11. How many different names have the least occurrences?" ] }, { @@ -607,7 +607,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### Step 12. What is the median name occurrence?" ] }, { @@ -724,7 +724,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### Step 13. What is the standard deviation of names?" ] }, { @@ -751,7 +751,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Step 14. Get a summary with the mean, min, max, std and quartiles." ] }, { diff --git a/06_Stats/US_Baby_Names/Solutions.ipynb b/06_Stats/US_Baby_Names/Solutions.ipynb index 9266b9473..55569c62e 100644 --- a/06_Stats/US_Baby_Names/Solutions.ipynb +++ b/06_Stats/US_Baby_Names/Solutions.ipynb @@ -342,7 +342,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. What year has the highest number of baby names in the dataset?" + "### Step 6. What year has the highest number of baby names in the dataset?" ] }, { @@ -368,7 +368,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 6. Are there more male or female names in the dataset?" + "### Step 7. Are there more male or female names in the dataset?" ] }, { @@ -395,7 +395,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 7. Group the dataset by name and assign to names" + "### Step 8. Group the dataset by name and assign to names" ] }, { @@ -471,7 +471,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 8. How many different names exist in the dataset?" + "### Step 9. How many different names exist in the dataset?" ] }, { @@ -496,7 +496,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 9. What is the name with most occurrences?" + "### Step 10. What is the name with most occurrences?" ] }, { @@ -521,7 +521,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 10. How many different names have the least occurrences?" + "### Step 11. How many different names have the least occurrences?" ] }, { @@ -546,7 +546,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 11. What is the median name occurrence?" + "### Step 12. What is the median name occurrence?" ] }, { @@ -898,7 +898,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 12. What is the standard deviation of names?" + "### Step 13. What is the standard deviation of names?" ] }, { @@ -923,7 +923,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Step 13. Get a summary with the mean, min, max, std and quartiles." + "### Step 14. Get a summary with the mean, min, max, std and quartiles." ] }, { From beca53e010d15815219216ff6963bd6f8bc88e49 Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Sat, 20 Mar 2021 07:20:27 +0100 Subject: [PATCH 4/6] fixed solution of visualizing chipo exercise --- 07_Visualization/Chipotle/Solutions.ipynb | 57 +++-------------------- 1 file changed, 6 insertions(+), 51 deletions(-) diff --git a/07_Visualization/Chipotle/Solutions.ipynb b/07_Visualization/Chipotle/Solutions.ipynb index 23e7498b3..971e2e109 100644 --- a/07_Visualization/Chipotle/Solutions.ipynb +++ b/07_Visualization/Chipotle/Solutions.ipynb @@ -50,11 +50,7 @@ "execution_count": 2, "metadata": {}, "outputs": [], - "source": [ - "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n", - " \n", - "chipo = pd.read_csv(url, sep = '\\t')" - ] + "source": [] }, { "cell_type": "markdown", @@ -214,9 +210,7 @@ "output_type": "execute_result" } ], - "source": [ - "chipo.head(10)" - ] + "source": [] }, { "cell_type": "markdown", @@ -243,30 +237,7 @@ "output_type": "display_data" } ], - "source": [ - "# get the Series of the names\n", - "x = chipo.item_name\n", - "\n", - "# use the Counter class from collections to create a dictionary with keys(text) and frequency\n", - "letter_counts = Counter(x)\n", - "\n", - "# convert the dictionary to a DataFrame\n", - "df = pd.DataFrame.from_dict(letter_counts, orient='index')\n", - "\n", - "# sort the values from the top to the least value and slice the first 5 items\n", - "df = df[0].sort_values(ascending = True)[45:50]\n", - "\n", - "# create the plot\n", - "df.plot(kind='bar')\n", - "\n", - "# Set the title and labels\n", - "plt.xlabel('Items')\n", - "plt.ylabel('Number of Times Ordered')\n", - "plt.title('Most ordered Chipotle\\'s Items')\n", - "\n", - "# show the plot\n", - "plt.show()" - ] + "source": [] }, { "cell_type": "markdown", @@ -304,23 +275,7 @@ "output_type": "display_data" } ], - "source": [ - "# create a list of prices\n", - "chipo.item_price = [float(value[1:-1]) for value in chipo.item_price] # strip the dollar sign and trailing space\n", - "\n", - "# then groupby the orders and sum\n", - "orders = chipo.groupby('order_id').sum()\n", - "\n", - "# creates the scatterplot\n", - "# plt.scatter(orders.quantity, orders.item_price, s = 50, c = 'green')\n", - "plt.scatter(x = orders.item_price, y = orders.quantity, s = 50, c = 'green')\n", - "\n", - "# Set the title and labels\n", - "plt.xlabel('Order Price')\n", - "plt.ylabel('Items ordered')\n", - "plt.title('Number of items ordered per order price')\n", - "plt.ylim(0)" - ] + "source": [] }, { "cell_type": "markdown", @@ -353,9 +308,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} From 4a20bedfafde3d57107c7328a4139a7a95aac0fb Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Sat, 20 Mar 2021 07:24:21 +0100 Subject: [PATCH 5/6] Revert "fixed solution of visualizing chipo exercise" This reverts commit beca53e010d15815219216ff6963bd6f8bc88e49. --- 07_Visualization/Chipotle/Solutions.ipynb | 57 ++++++++++++++++++++--- 1 file changed, 51 insertions(+), 6 deletions(-) diff --git a/07_Visualization/Chipotle/Solutions.ipynb b/07_Visualization/Chipotle/Solutions.ipynb index 971e2e109..23e7498b3 100644 --- a/07_Visualization/Chipotle/Solutions.ipynb +++ b/07_Visualization/Chipotle/Solutions.ipynb @@ -50,7 +50,11 @@ "execution_count": 2, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n", + " \n", + "chipo = pd.read_csv(url, sep = '\\t')" + ] }, { "cell_type": "markdown", @@ -210,7 +214,9 @@ "output_type": "execute_result" } ], - "source": [] + "source": [ + "chipo.head(10)" + ] }, { "cell_type": "markdown", @@ -237,7 +243,30 @@ "output_type": "display_data" } ], - "source": [] + "source": [ + "# get the Series of the names\n", + "x = chipo.item_name\n", + "\n", + "# use the Counter class from collections to create a dictionary with keys(text) and frequency\n", + "letter_counts = Counter(x)\n", + "\n", + "# convert the dictionary to a DataFrame\n", + "df = pd.DataFrame.from_dict(letter_counts, orient='index')\n", + "\n", + "# sort the values from the top to the least value and slice the first 5 items\n", + "df = df[0].sort_values(ascending = True)[45:50]\n", + "\n", + "# create the plot\n", + "df.plot(kind='bar')\n", + "\n", + "# Set the title and labels\n", + "plt.xlabel('Items')\n", + "plt.ylabel('Number of Times Ordered')\n", + "plt.title('Most ordered Chipotle\\'s Items')\n", + "\n", + "# show the plot\n", + "plt.show()" + ] }, { "cell_type": "markdown", @@ -275,7 +304,23 @@ "output_type": "display_data" } ], - "source": [] + "source": [ + "# create a list of prices\n", + "chipo.item_price = [float(value[1:-1]) for value in chipo.item_price] # strip the dollar sign and trailing space\n", + "\n", + "# then groupby the orders and sum\n", + "orders = chipo.groupby('order_id').sum()\n", + "\n", + "# creates the scatterplot\n", + "# plt.scatter(orders.quantity, orders.item_price, s = 50, c = 'green')\n", + "plt.scatter(x = orders.item_price, y = orders.quantity, s = 50, c = 'green')\n", + "\n", + "# Set the title and labels\n", + "plt.xlabel('Order Price')\n", + "plt.ylabel('Items ordered')\n", + "plt.title('Number of items ordered per order price')\n", + "plt.ylim(0)" + ] }, { "cell_type": "markdown", @@ -308,9 +353,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file From afe9e2e2c6ff716a20b0b7a69e8db8839d7faec3 Mon Sep 17 00:00:00 2001 From: Lateefah Bello <2019cinnamon@gmail.com> Date: Sat, 20 Mar 2021 07:26:22 +0100 Subject: [PATCH 6/6] fixed visualising chipo exercise --- 07_Visualization/Chipotle/Solutions.ipynb | 57 +++-------------------- 1 file changed, 6 insertions(+), 51 deletions(-) diff --git a/07_Visualization/Chipotle/Solutions.ipynb b/07_Visualization/Chipotle/Solutions.ipynb index 23e7498b3..971e2e109 100644 --- a/07_Visualization/Chipotle/Solutions.ipynb +++ b/07_Visualization/Chipotle/Solutions.ipynb @@ -50,11 +50,7 @@ "execution_count": 2, "metadata": {}, "outputs": [], - "source": [ - "url = 'https://raw.githubusercontent.com/justmarkham/DAT8/master/data/chipotle.tsv'\n", - " \n", - "chipo = pd.read_csv(url, sep = '\\t')" - ] + "source": [] }, { "cell_type": "markdown", @@ -214,9 +210,7 @@ "output_type": "execute_result" } ], - "source": [ - "chipo.head(10)" - ] + "source": [] }, { "cell_type": "markdown", @@ -243,30 +237,7 @@ "output_type": "display_data" } ], - "source": [ - "# get the Series of the names\n", - "x = chipo.item_name\n", - "\n", - "# use the Counter class from collections to create a dictionary with keys(text) and frequency\n", - "letter_counts = Counter(x)\n", - "\n", - "# convert the dictionary to a DataFrame\n", - "df = pd.DataFrame.from_dict(letter_counts, orient='index')\n", - "\n", - "# sort the values from the top to the least value and slice the first 5 items\n", - "df = df[0].sort_values(ascending = True)[45:50]\n", - "\n", - "# create the plot\n", - "df.plot(kind='bar')\n", - "\n", - "# Set the title and labels\n", - "plt.xlabel('Items')\n", - "plt.ylabel('Number of Times Ordered')\n", - "plt.title('Most ordered Chipotle\\'s Items')\n", - "\n", - "# show the plot\n", - "plt.show()" - ] + "source": [] }, { "cell_type": "markdown", @@ -304,23 +275,7 @@ "output_type": "display_data" } ], - "source": [ - "# create a list of prices\n", - "chipo.item_price = [float(value[1:-1]) for value in chipo.item_price] # strip the dollar sign and trailing space\n", - "\n", - "# then groupby the orders and sum\n", - "orders = chipo.groupby('order_id').sum()\n", - "\n", - "# creates the scatterplot\n", - "# plt.scatter(orders.quantity, orders.item_price, s = 50, c = 'green')\n", - "plt.scatter(x = orders.item_price, y = orders.quantity, s = 50, c = 'green')\n", - "\n", - "# Set the title and labels\n", - "plt.xlabel('Order Price')\n", - "plt.ylabel('Items ordered')\n", - "plt.title('Number of items ordered per order price')\n", - "plt.ylim(0)" - ] + "source": [] }, { "cell_type": "markdown", @@ -353,9 +308,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.8" + "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +}