diff --git a/lectures/Day_4.ipynb b/lectures/Day_4.ipynb index eab1520..9022de9 100644 --- a/lectures/Day_4.ipynb +++ b/lectures/Day_4.ipynb @@ -91,7 +91,7 @@ "metadata": {}, "outputs": [], "source": [ - "print(sorted(['2000', '30', '100']))" + "print(sorted(['2000', '30', '100', '5', '301']))" ] }, { @@ -104,7 +104,7 @@ }, "outputs": [], "source": [ - "ord('2')" + "ord('3')" ] }, { @@ -476,7 +476,7 @@ }, "outputs": [], "source": [ - "bool('')" + "bool([])" ] }, { @@ -796,7 +796,7 @@ "HOST = 'global'\n", "\n", "def show_host():\n", - " print(f'HOST inside the function = {HOST}')\n", + " print('HOST inside the function = {HOST}')\n", "\n", "show_host()\n", "print(f'HOST outside the function = {HOST}')\n" @@ -824,10 +824,12 @@ "def change_host():\n", " HOST = 'local'\n", " print(f'HOST inside the function = {HOST}')\n", - "\n", + "def app2():\n", + " print(HOST)\n", "print(f'HOST outside the function before change = {HOST}')\n", "change_host()\n", - "print(f'HOST outside the function after change = {HOST}')\n" + "print(f'HOST outside the function after change = {HOST}')\n", + "app2()" ] }, { @@ -895,7 +897,7 @@ } }, "source": [ - "Will the global variable never to changed by function?" + "### Will the global variable never to changed by function?" ] }, { @@ -957,7 +959,7 @@ "\n", "count1 = cytosine_count('CATATTAC')\n", "count2 = cytosine_count('tagtag')\n", - "print(count1, count2)" + "print(count1, \"\\n\", count2)" ] }, { @@ -1033,8 +1035,8 @@ "def foo():\n", " do_nothing = 1\n", "\n", - "r = foo()\n", - "print(f'Return value of foo() = {r}')" + "result = foo()\n", + "print(f'Return value of foo() = {result}')" ] }, { @@ -1048,6 +1050,67 @@ "- Use `return` for all values that you might want to use later in your program" ] }, + { + "cell_type": "markdown", + "metadata": { + "cell_marker": "'''", + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "### Small detour: Python's value for missing values: `None`\n", + "\n", + "- Default value for optional arguments\n", + "- Implicit return value of functions without a `return` statement\n", + "- `None` is `None`, not anything else" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "None == 0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "None == False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "None == ''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "bool(None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "type(None)" + ] + }, { "cell_type": "markdown", "metadata": { @@ -1287,12 +1350,12 @@ "metadata": {}, "outputs": [], "source": [ - "def format_sentence(subject, value, end):\n", + "def format_sentence(subject, value = 13, end = \"....\"):\n", " return 'The ' + subject + ' is ' + value + end\n", "\n", "print(format_sentence('lecture', 'ongoing', '.'))\n", "\n", - "print(format_sentence('lecture', 'ongoing', end='!'))\n", + "print(format_sentence('lecture', '!', value='ongoing'))\n", "\n", "print(format_sentence(subject='lecture', value='ongoing', end='...'))" ] @@ -1345,7 +1408,7 @@ "metadata": {}, "outputs": [], "source": [ - "def format_sentence(subject, value, end='.', second_value=None):\n", + "def format_sentence(subject, value, end='.', second_value=0):\n", " if second_value is None:\n", " return 'The ' + subject + ' is ' + value + end\n", " else:\n", @@ -1357,67 +1420,6 @@ " second_value='self-referential', end='!'))" ] }, - { - "cell_type": "markdown", - "metadata": { - "cell_marker": "'''", - "slideshow": { - "slide_type": "slide" - } - }, - "source": [ - "### Small detour: Python's value for missing values: `None`\n", - "\n", - "- Default value for optional arguments\n", - "- Implicit return value of functions without a `return` statement\n", - "- `None` is `None`, not anything else" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "None == 0" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "None == False" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "None == ''" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "bool(None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "type(None)" - ] - }, { "cell_type": "markdown", "metadata": { @@ -1520,9 +1522,7 @@ "\n", "- Notebook Day_4_Exercise_1 (~30 minutes)\n", "- Go to Canvas, `Modules -> Day 4 -> Exercise 1 - day 4`\n", - "- Quiz. Go to Canvas, `Modules -> Day 4 -> PyQuiz 4.1`\n", "\n", - "- Lunch break\n", "\n", "- Extra reading:\n", " - https://realpython.com/python-kwargs-and-args/\n", @@ -1636,7 +1636,7 @@ "source": [ "import os\n", "\n", - "os.system(\"date\")" + "os.system(\"ls\")" ] }, { @@ -1648,7 +1648,7 @@ } }, "source": [ - "How to find the right module and instructions?\n", + "### How to find the right module and instructions?\n", "\n", "- Look at the [module index](https://docs.python.org/3/py-modindex.html) for Python standard modules\n", "- Search [PyPI](http://pypi.org)\n", @@ -1689,7 +1689,8 @@ "metadata": {}, "outputs": [], "source": [ - "text = 'Programming,is,cool'" + "text = 'Programming,is,cool'\n", + "text.split(sep=',')" ] }, { @@ -1937,7 +1938,7 @@ "source": [ "def process_file(filename, chrom, pos):\n", " \"\"\"\n", - " Read a vcf file, search for lines matching\n", + " Read a very large vcf file, search for lines matching\n", " chromosome chrom and position pos.\n", "\n", " Print the genotypes of the matching lines.\n", @@ -2116,7 +2117,10 @@ } }, "outputs": [], - "source": [] + "source": [ + "from files import mywork\n", + "mywork.pipeline([\"accctt\", \"gaccct\"])" + ] }, { "cell_type": "markdown", @@ -2134,6 +2138,13 @@ "https://www.python.org/dev/peps/pep-0008/?#comments" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Lunch" + ] + }, { "cell_type": "markdown", "metadata": { @@ -2142,13 +2153,11 @@ } }, "source": [ - "### Quiz time!\n", + "### Quiz time\n", "\n", "Go to Canvas, `Modules -> Day 4 -> PyQuiz 4.1`\n", "\n", - "~10 min\n", - "\n", - "Lunch after quiz" + "~10 min" ] }, { @@ -2404,15 +2413,6 @@ "#### Orange tree data" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!cat ../downloads/Orange_1.tsv" - ] - }, { "cell_type": "code", "execution_count": null, @@ -2519,7 +2519,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.std()" + "df.max()" ] }, { @@ -2578,7 +2578,8 @@ }, "outputs": [], "source": [ - "df.age" + "df_new = df.age\n", + "df_new " ] }, { @@ -2680,7 +2681,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.loc[[1, 3]] # select row 2 and 4" + "df.loc[[1, 3, 0]] # select row 2 and 4" ] }, { @@ -2709,7 +2710,7 @@ "metadata": {}, "outputs": [], "source": [ - "df.loc[[1, 3], ['age', 'height']]" + "df.loc[[0], ['age']]" ] }, { @@ -2755,7 +2756,7 @@ }, "outputs": [], "source": [ - "df.loc[1:3, ['age']].mean()" + "df.loc[1:10]" ] }, { @@ -2801,6 +2802,7 @@ "source": [ "import math\n", "df['radius'] = df['circumference'] / 2.0 / math.pi\n", + "\n", "df" ] }, @@ -2836,7 +2838,7 @@ " 'age': [1,2,3,4],\n", " 'circumference': [2,3,5,10],\n", " 'height': [30, 35, 40, 50]\n", - "})" + "})\n" ] }, { @@ -2885,7 +2887,7 @@ }, "outputs": [], "source": [ - "pd.concat([df1, df2], axis=1)" + "pd.concat([df2, df1], axis=0)" ] }, { @@ -2914,17 +2916,6 @@ "### Slightly bigger data frame of orange trees " ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "!head -n 10 ../downloads/Orange.tsv" - ] - }, { "cell_type": "code", "execution_count": null, @@ -2936,7 +2927,7 @@ "outputs": [], "source": [ "df = pd.read_table('../downloads/Orange.tsv')\n", - "df.iloc[0:5] # can also use .head()" + "df.head(3) # can also use .head()" ] }, { @@ -2971,9 +2962,7 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "scrolled": true - }, + "metadata": {}, "outputs": [], "source": [ "df[df.age > 500]" @@ -2985,7 +2974,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[(df.age > 500) & (df.circumference < 100)]" + "df[(df.age > 500) & (df.circumference < 100) ]" ] }, { @@ -3026,7 +3015,7 @@ }, "source": [ "### Small exercise 1\n", - "* find the maximal circumference and then filter the data frame by it" + "* Find the maximal circumference and then filter the data frame by it" ] }, { @@ -3035,7 +3024,7 @@ "metadata": {}, "outputs": [], "source": [ - "df[df.circumference == df.circumference.max() ]" + "df" ] }, { @@ -3173,7 +3162,7 @@ }, "outputs": [], "source": [ - "small_df.plot(x='age', y='height', kind='line') # plot the relationship of age and height\n", + "small_df.plot(x='age', y='circumference', kind='line') # plot the relationship of age and height\n", "# try with other types of plots, e.g. scatter" ] }, @@ -3186,7 +3175,7 @@ } }, "source": [ - "### What if no plots shows up?" + "### Tips: what if no plots shows up?" ] }, { @@ -3296,7 +3285,7 @@ "metadata": {}, "outputs": [], "source": [ - "small_df.plot(kind='hist', y = 'age')" + "small_df.plot(kind='hist', y = 'age', fontsize=18)" ] }, {