diff --git a/.nojekyll b/.nojekyll
index f1e7790a..c31efbe9 100644
--- a/.nojekyll
+++ b/.nojekyll
@@ -1 +1 @@
-6e8a44e8
\ No newline at end of file
+ea827d25
\ No newline at end of file
diff --git a/index.html b/index.html
index c274ec3e..d0185398 100644
--- a/index.html
+++ b/index.html
@@ -143,7 +143,7 @@ <h2 class="anchored" data-anchor-id="tools-for-reproducible-research"><span clas
 <p>How to work reproducibly with control and structuring of project code, environment and workflow management</p>
 <hr>
 <div class="small">
-<p>Updated: 10-10-2024 at 09:14:09 .</p>
+<p>Updated: 15-10-2024 at 16:39:31 .</p>
 </div>
 </section>
 </div>
diff --git a/pages/conda.html b/pages/conda.html
new file mode 100644
index 00000000..c184c8cd
--- /dev/null
+++ b/pages/conda.html
@@ -0,0 +1,947 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Controlling your environment with Conda</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Controlling your environment with Conda</h1>
+            <p class="subtitle lead">How to keep track of your software packages and their versions</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a>
+  <ul>
+  <li><a href="#creating-conda-environments" id="toc-creating-conda-environments" class="nav-link" data-scroll-target="#creating-conda-environments"><span class="header-section-number">2.1</span> Creating Conda environments</a></li>
+  <li><a href="#adding-more-packages" id="toc-adding-more-packages" class="nav-link" data-scroll-target="#adding-more-packages"><span class="header-section-number">2.2</span> Adding more packages</a></li>
+  <li><a href="#removing-packages" id="toc-removing-packages" class="nav-link" data-scroll-target="#removing-packages"><span class="header-section-number">2.3</span> Removing packages</a></li>
+  </ul></li>
+  <li><a href="#working-with-environments" id="toc-working-with-environments" class="nav-link" data-scroll-target="#working-with-environments"><span class="header-section-number">3</span> Working with environments</a>
+  <ul>
+  <li><a href="#working-with-environments-1" id="toc-working-with-environments-1" class="nav-link" data-scroll-target="#working-with-environments-1"><span class="header-section-number">3.1</span> Working with environments</a></li>
+  <li><a href="#keeping-track-of-dependencies" id="toc-keeping-track-of-dependencies" class="nav-link" data-scroll-target="#keeping-track-of-dependencies"><span class="header-section-number">3.2</span> Keeping track of dependencies</a></li>
+  </ul></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">4</span> Extra material</a>
+  <ul>
+  <li><a href="#configuration" id="toc-configuration" class="nav-link" data-scroll-target="#configuration"><span class="header-section-number">4.1</span> Configuration</a></li>
+  <li><a href="#managing-python-versions" id="toc-managing-python-versions" class="nav-link" data-scroll-target="#managing-python-versions"><span class="header-section-number">4.2</span> Managing Python versions</a>
+  <ul class="collapse">
+  <li><a href="#your-current-python-installation" id="toc-your-current-python-installation" class="nav-link" data-scroll-target="#your-current-python-installation"><span class="header-section-number">4.2.1</span> Your current Python installation</a></li>
+  <li><a href="#different-python-versions" id="toc-different-python-versions" class="nav-link" data-scroll-target="#different-python-versions"><span class="header-section-number">4.2.2</span> Different Python versions</a></li>
+  </ul></li>
+  <li><a href="#decorating-your-prompt" id="toc-decorating-your-prompt" class="nav-link" data-scroll-target="#decorating-your-prompt"><span class="header-section-number">4.3</span> Decorating your prompt</a></li>
+  <li><a href="#bash-aliases-for-conda" id="toc-bash-aliases-for-conda" class="nav-link" data-scroll-target="#bash-aliases-for-conda"><span class="header-section-number">4.4</span> Bash aliases for conda</a></li>
+  <li><a href="#rolling-back-to-an-earlier-version-of-the-environment" id="toc-rolling-back-to-an-earlier-version-of-the-environment" class="nav-link" data-scroll-target="#rolling-back-to-an-earlier-version-of-the-environment"><span class="header-section-number">4.5</span> Rolling back to an earlier version of the environment</a></li>
+  <li><a href="#mamba-the-drop-in-conda-replacement" id="toc-mamba-the-drop-in-conda-replacement" class="nav-link" data-scroll-target="#mamba-the-drop-in-conda-replacement"><span class="header-section-number">4.6</span> Mamba, the drop-in Conda replacement</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p>Conda is a package and environment manager. As a package manager it enables you to install a wide range of software and tools using one simple command: <code>conda install</code>. As an environment manager it allows you to create and manage multiple different environments, each with their own set of packages.</p>
+<p>What are the benefits of using an environment manager? Some examples include the ability to easily run different versions of the same package, have different cross-package dependencies that are otherwise incompatible with each other and, last but not least, easy installation of all the software needed for an analysis.</p>
+<p>Environments are of particular relevance when making bioinformatics projects reproducible. Full reproducibility requires the ability to recreate the system that was originally used to generate the results. This can, to a large extent, be accomplished by using Conda to make a project environment with specific versions of the packages that are needed in the project. You can read more about Conda <a href="https://conda.io/projects/conda/en/latest/user-guide/concepts/index.html">here</a>.</p>
+<p>A Conda <em>package</em> is a compressed tarball (system-level libraries, Python or other modules, executable programs or other components). Conda keeps track of the dependencies between packages and platforms - this means that when installing a given package, all necessary dependencies will also be installed.</p>
+<p>Conda packages are typically hosted and downloaded from remote so-called <em>channels</em>. Some widely used channels for general-purpose and bioinformatics packages are <a href="https://conda-forge.org/">conda-forge</a> and <a href="https://bioconda.github.io/">Bioconda</a>, respectively. Both of these are community-driven projects, so if you’re missing some package you can contribute to the channel by adding the package to it. When installing a Conda package you specify the package name, version (optional) and channel to download from.</p>
+<p>A Conda <em>environment</em> is essentially a directory that is added to your PATH and that contains a specific collection of packages that you have installed. Packages are symlinked between environments to avoid unnecessary duplication.</p>
+<div class="callout-">
+<p><strong>Different Conda flavours</strong> You may come across several flavours of Conda. There’s <em>Miniconda</em>, which is the installer for Conda. The second is <em>Anaconda</em>, which is a distribution of not only Conda, but also over 150 scientific Python packages curated by the company by the same name (Anaconda). It’s generally better to stick with the Miniconda installation rather than installing 3 GB worth of packages you may not even use. Then, lastly, there’s the <em>Miniforge</em> flavour that we’re using here, which is a community-driven version of Conda that’s highly popular within the scientific community.</p>
+<p>The difference between Miniconda and Miniforge is that the former points to points to the <code>default</code> channel by default (which requires an Anaconda license for commercial purposes), while the latter points to the community-maintained <code>conda-forge</code> channel by default. While Conda is created and owned by Anaconda the company, Conda itself is open source - it’s the <code>default</code> channel that is proprietary. The <code>conda-forge</code> and <code>bioconda</code> channels (two of the largest channels outside of <code>default</code>) are community-driven. Confusing? Yes. If you want this information more in-depth you can read this <a href="https://www.anaconda.com/blog/is-conda-free">blog post by Anaconda</a>.</p>
+</div>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to set it up if, you haven’t done so already. Then open up a terminal and go to <code>workshop-reproducible-research/tutorials/conda</code>. Instructions below assume that you are standing in <code>workshop-reproducible-research/tutorials/conda/</code> unless otherwise specified (<em>e.g.</em> if it says “create a file”, it means save it in <code>workshop-reproducible-research/tutorials/conda/</code>).</p>
+<p>Let’s assume that you are just about to start a new exciting research project called <em>Project A</em>.</p>
+<section id="creating-conda-environments" class="level3" data-number="2.1">
+<h3 data-number="2.1" class="anchored" data-anchor-id="creating-conda-environments"><span class="header-section-number">2.1</span> Creating Conda environments</h3>
+<ul>
+<li>Let’s make our first Conda environment:</li>
+</ul>
+<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> create <span class="at">-n</span> project_a <span class="at">-c</span> bioconda fastqc</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will create an environment called <code>project_a</code>, containing FastQC from the Bioconda channel. Conda will list the packages that will be installed and ask for your confirmation.</p>
+<ul>
+<li>Once it is done, you can activate the environment:</li>
+</ul>
+<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> activate project_a</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>By default, Conda will add information to your prompt telling you which environment that is active.</p>
+<ul>
+<li>To see all your environments you can run:</li>
+</ul>
+<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> info <span class="at">--envs</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The active environment will be marked with an asterisk.</p>
+<ul>
+<li>To see the installed packages and their versions in the active environment, run:</li>
+</ul>
+<div class="sourceCode" id="cb4"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>To save the installed packages to a file, run:</li>
+</ul>
+<div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> env export <span class="at">--from-history</span> <span class="op">&gt;</span> environment.yml</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Where <code>--from-history</code> only reports the packages requested to be installed and not additional dependencies. A caveat is that if no version was originally specified, then it is not included in the export file either.</p>
+<ul>
+<li>Now, deactivate the environment by running <code>conda deactivate</code>.</li>
+<li>List all environments again. Which environment is now marked as active?</li>
+<li>Try to run FastQC:</li>
+</ul>
+<div class="sourceCode" id="cb6"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="ex">fastqc</span> <span class="at">--version</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Did it work? Activate your <code>project_a</code> environment and run the <code>fastqc --version</code> command again. Does it work now?</li>
+</ul>
+<p>Hopefully the FastQC software was not found in your base environment (unless you had installed it previously), but worked once your environment was activated.</p>
+</section>
+<section id="adding-more-packages" class="level3" data-number="2.2">
+<h3 data-number="2.2" class="anchored" data-anchor-id="adding-more-packages"><span class="header-section-number">2.2</span> Adding more packages</h3>
+<ul>
+<li>Now, let’s add another package (<em>MultiQC</em>) to our environment using <code>conda install</code>. Make sure that <code>project_a</code> is the active environment first.</li>
+</ul>
+<div class="sourceCode" id="cb7"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> bioconda multiqc</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>If we don’t specify the package version, the latest available version will be installed. What version of MultiQC got installed?</li>
+<li>Run the following to see what versions are available:</li>
+</ul>
+<div class="sourceCode" id="cb8"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> search <span class="at">-c</span> bioconda multiqc</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Now try to install a different version of MultiQC, <em>e.g.</em>:</li>
+</ul>
+<div class="sourceCode" id="cb9"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> bioconda multiqc=1.13</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Read the information that Conda displays in the terminal. It probably asks if you want to downgrade the initial MultiQC installation to the one specified here (<code>1.13</code> in the example). You can only have one version of a given package in a given environment.</p>
+<p>Let’s assume that you will have sequencing data in your Project A, and want to use the latest BBMap software to align your reads.</p>
+<ul>
+<li>Find out what versions of BBMap are available in the Bioconda channel using <code>conda search -c bioconda bbmap</code>.</li>
+<li>Now install the <em>latest</em> available version of BBMap in your <code>project_a</code> environment.</li>
+</ul>
+<p>Let’s further assume that you have an old project (called <em>Project Old</em>) where you know you used BBMap <code>37.10</code>. You just got back reviewer comments and they want you to include some alignment statistics. Unfortunately, you haven’t saved that information so you will have to rerun the alignment. Now, it is essential that you use the same version of BBMap that your results are based on, otherwise the alignment statistics will be misleading. Using Conda environments this becomes simple. You can just have a separate environment for your old project where you have an old version of BBMap without interfering with your new Project A where you want the latest version.</p>
+<ul>
+<li>Make a new environment for your old project:</li>
+</ul>
+<div class="sourceCode" id="cb10"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> create <span class="at">-n</span> project_old <span class="at">-c</span> bioconda bbmap=37.10</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>List your environments (do you remember the command?).</li>
+<li>Activate <code>project_old</code> and check the BBMap version (<code>bbmap.sh --version</code>).</li>
+<li>Activate <code>project_a</code> again and check the BBMap version.</li>
+</ul>
+</section>
+<section id="removing-packages" class="level3" data-number="2.3">
+<h3 data-number="2.3" class="anchored" data-anchor-id="removing-packages"><span class="header-section-number">2.3</span> Removing packages</h3>
+<p>Now let’s try to remove an installed package from the active environment:</p>
+<pre><code>conda remove multiqc</code></pre>
+<ul>
+<li>Run <code>conda deactivate</code> to exit your active environment.</li>
+<li>Now, let’s remove an environment:</li>
+</ul>
+<div class="sourceCode" id="cb12"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> env remove <span class="at">-n</span> project_old</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>After making a few different environments and installing a bunch of packages, Conda can take up some disk space. You can remove unnecessary files with the command:</p>
+<div class="sourceCode" id="cb13"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> clean <span class="at">-a</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will remove package tar-balls that are left from package installations, unused packages (<em>i.e.</em> those not present in any environments), and cached data.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use <code>conda install</code> for installing packages on the fly.</li>
+<li>How to create, activate and change between environments.</li>
+<li>How to remove packages or environments and clean up.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="working-with-environments" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="working-with-environments"><span class="header-section-number">3</span> Working with environments</h2>
+<p>We have up until now specified which Conda packages to install directly on the command line using the <code>conda create</code> and <code>conda install</code> commands. For working in projects this is not the recommended way. Instead, for increased control and reproducibility, it is better to use an <em>environment file</em> (in <a href="https://en.wikipedia.org/wiki/YAML">YAML format</a>) that specifies the packages, versions and channels needed to create the environment for a project.</p>
+<p>Throughout these tutorials we will use a case study where we analyse an RNA-seq experiment with the multi-resistant bacteria MRSA (see <a href="introduction">intro</a>). You will now start to make a Conda YAML file for this MRSA project. The file will contain a list of the software and versions needed to execute the analysis code.</p>
+<p>In this Conda tutorial, all code for the analysis is available in the script <code>code/run_qc.sh</code>. This code will download the raw FASTQ-files and subsequently run quality control on these using the FastQC software.</p>
+<section id="working-with-environments-1" class="level3" data-number="3.1">
+<h3 data-number="3.1" class="anchored" data-anchor-id="working-with-environments-1"><span class="header-section-number">3.1</span> Working with environments</h3>
+<p>We will start by making a Conda YAML-file that contains the required packages to perform these two steps. Later in the course, you will update the Conda YAML-file with more packages, as the analysis workflow is expanded.</p>
+<ul>
+<li>Let’s get going! Make a YAML file called <code>environment.yml</code> looking like this, and save it in the current directory (which should be <code>workshop-reproducible-research/tutorials/conda</code>):</li>
+</ul>
+<div class="sourceCode" id="cb14"><pre class="sourceCode yml code-with-copy"><code class="sourceCode yaml"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="fu">channels</span><span class="kw">:</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> conda-forge</span></span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> bioconda</span></span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="fu">dependencies</span><span class="kw">:</span></span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> fastqc=0.12.1</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Now, make a new Conda environment from the YAML file (note that here the command is <code>conda env create</code> as opposed to <code>conda create</code> that we used before):</li>
+</ul>
+<div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> env create <span class="at">-n</span> project_mrsa <span class="at">-f</span> environment.yml</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>You can also specify exactly which channel a package should come from inside the environment file, using the <code>channel::package=version</code> syntax.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Instead of the <code>-n</code> flag you can use the <code>-p</code> flag to set the full path to where the Conda environment should be installed. In that way you can contain the Conda environment inside the project directory, which does make sense from a reproducibility perspective, and makes it easier to keep track of what environment belongs to what project. If you don’t specify <code>-p</code> the environment will be installed in the <code>envs/</code> directory inside your Conda installation path.</p>
+</div>
+</div>
+<ul>
+<li><p>Activate the environment!</p></li>
+<li><p>Now we can run the code for the MRSA project found in <code>code/run_qc.sh</code>, either by running <code>bash code/run_qc.sh</code> or by opening the <code>run_qc.sh</code> file and executing each line in the terminal one by one. Do this!</p></li>
+</ul>
+<p>This should download the project FASTQ files and run FastQC on them (as mentioned above).</p>
+<ul>
+<li>Check your directory contents (<code>ls -Rlh</code>, or in your file browser). It should now have the following structure:</li>
+</ul>
+<pre class="no-highlight"><code>   conda/
+    |
+    |- code/
+    |   |- run_qc.sh
+    |
+    |- data/
+    |   |- SRR935090.fastq.gz
+    |   |- SRR935091.fastq.gz
+    |   |- SRR935092.fastq.gz
+    |
+    |- results/
+    |   |- fastqc/
+    |       |- SRR935090_fastqc.html
+    |       |- SRR935090_fastqc.zip
+    |       |- SRR935091_fastqc.html
+    |       |- SRR935091_fastqc.zip
+    |       |- SRR935092_fastqc.html
+    |       |- SRR935092_fastqc.zip
+    |
+    |- environment.yml</code></pre>
+<p>Note that all that was needed to carry out the analysis and generate these files and results was <code>environment.yml</code> (that we used to create a Conda environment with the required packages) and the analysis code in <code>code/run_qc.sh</code>.</p>
+</section>
+<section id="keeping-track-of-dependencies" class="level3" data-number="3.2">
+<h3 data-number="3.2" class="anchored" data-anchor-id="keeping-track-of-dependencies"><span class="header-section-number">3.2</span> Keeping track of dependencies</h3>
+<p>Projects can often be quite large and require lots of dependencies; it can feel daunting to try to capture all of that in a single Conda environment, especially when you consider potential incompatibilities that may arise. It can therefore be a good idea to start new projects with an environment file with each package you know that you will need to use, but without specifying exact versions (except for those packages where you <em>know</em> you need a specific version). This will install the latest compatible versions of all the specified software, making the start-up and installation part of new projects easier. You can then add the versions that were installed to your environment file afterwards, ensuring future reproducibility.</p>
+<p>There is one command that can make this easier: <code>conda env export</code>. This allows you to export a list of the packages you’ve already installed, including their specific versions, meaning you can easily add them after the fact to your environment file. If you use the <code>--no-builds</code> flag, you’ll get a list of the packages minus their OS-specific build specifications, which is more useful for making the environment portable across systems. This way, you can start with an environment file with just the packages you need (without version), which will install the most up-to-date version possible, and then add the resulting version back in to the environment file using the <code>export</code> command!</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to define our Conda environment using a YAML-file.</li>
+<li>How to use <code>conda env create</code> to make a new environment from a YAML-file.</li>
+<li>How to use <code>conda env export</code> to get a list of installed packages.</li>
+<li>How to work in a project-like setting.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="extra-material" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">4</span> Extra material</h2>
+<p>The following extra material contains some more advanced things you can do with Conda and the command line in general, which is not part of the main course materials. All the essential skills of are covered by the previous section: the material here should be considered tips and tricks from people who use Conda as part of their daily work. You thus don’t need to use these things unless you want to, and you can even skip this part of the lesson if you like!</p>
+<section id="configuration" class="level3" data-number="4.1">
+<h3 data-number="4.1" class="anchored" data-anchor-id="configuration"><span class="header-section-number">4.1</span> Configuration</h3>
+<p>The behaviour of your Conda installation can be changed using an optional configuration file <code>.condarc</code>. On a fresh Conda install no such file is included but it’s created in your home directory as <code>~/.condarc</code> the first time you run <code>conda config</code>.</p>
+<p>You can edit the <code>.condarc</code> file either using a text editor or by way of the <code>conda config</code> command. To list all config parameters and their settings run:</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--show</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Similar to Conda environment files, the configuration file is in YAML syntax. This means that the config file is structured in the form of <code>key:value</code> pairs where the <code>key</code> is the name of the config parameter (<em>e.g.</em> <code>auto_update_conda</code>) and the <code>value</code> is the parameter setting (<em>e.g.</em> <code>True</code>).</p>
+<p>Adding the name of a config parameter to <code>conda config --show</code> will show only that parameter, <em>e.g.</em> <code>conda config --show channels</code>.</p>
+<p>You can change parameters with the <code>--set</code>, <code>--add</code>, <code>--append</code> and <code>--remove</code> flags to <code>conda config</code>.</p>
+<p>If you for example want to enable the ‘Always yes’ behaviour which makes Conda automatically choose the <code>yes</code> option, such as when installing, you can run:</p>
+<div class="sourceCode" id="cb18"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--set</span> always_yes True</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To see details about a config parameter you can run <code>conda config --describe parameter</code>. Try running it on the <code>channels</code> parameter:</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--describe</span> channels</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>In the beginning of this tutorial we added Conda channels to the <code>.condarc</code> file using <code>conda config --add channels</code>. To remove one of the channels from the configuration file you can run:</p>
+<div class="sourceCode" id="cb20"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--remove</span> channels conda-forge</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Check your <code>.condarc</code> file to see the change. To add the <em>conda-forge</em> channel back to the top of the <code>channels</code> simply run:</p>
+<div class="sourceCode" id="cb21"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--add</span> channels conda-forge</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To completely remove a parameter and all its values run:</p>
+<div class="sourceCode" id="cb22"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> config <span class="at">--remove-key</span> parameter</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>For a list of Conda configuration parameters see the <a href="https://docs.conda.io/projects/conda/en/latest/configuration.html">Conda configuration</a> page.</p>
+</section>
+<section id="managing-python-versions" class="level3" data-number="4.2">
+<h3 data-number="4.2" class="anchored" data-anchor-id="managing-python-versions"><span class="header-section-number">4.2</span> Managing Python versions</h3>
+<p>With Conda environments it’s possible to keep several different versions of Python on your computer at the same time, and switching between these versions is very easy. However, a single Conda environment can only contain one version of Python.</p>
+<section id="your-current-python-installation" class="level4" data-number="4.2.1">
+<h4 data-number="4.2.1" class="anchored" data-anchor-id="your-current-python-installation"><span class="header-section-number">4.2.1</span> Your current Python installation</h4>
+<p>The <code>base</code> environment has its own version of Python installed. When you open a terminal (after having installed Conda on your system) this base environment is activated by default (as evidenced by <code>(base)</code> prepended to your prompt). You can check what Python version is installed in this environment by running <code>python --version</code>. To see the exact path to the Python executable type <code>which python</code>.</p>
+<p>In addition to this your computer may already have Python installed in a separate (system-wide) location outside of the Conda installation. To see if that is the case type <code>conda deactivate</code> until your prompt is not prepended with a Conda environment name. Then type <code>which python</code>. If a path was printed to the terminal (<em>e.g.</em> <code>/usr/bin/python</code>) that means some Python version is already installed in that location. Check what version it is by typing <code>python --version</code>.</p>
+<p>Now activate the <code>base</code> environment again by typing <code>conda activate</code> (or the equivalent <code>conda activate base</code>) then check the Python installation path and version using <code>which</code> and <code>python --version</code> as above. See the difference? When you activate an environment your <code>$PATH</code> variable is updated so that when you call <code>python</code> (or any other program) the system first searches the directory of the currently active environment.</p>
+</section>
+<section id="different-python-versions" class="level4" data-number="4.2.2">
+<h4 data-number="4.2.2" class="anchored" data-anchor-id="different-python-versions"><span class="header-section-number">4.2.2</span> Different Python versions</h4>
+<p>When you create a new Conda environment you can choose to install a specific version of Python in that environment as well. As an example, create an environment containing Python version <code>3.5</code> by running:</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> create <span class="at">-n</span> py35 python=3.5</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here we name the environment <code>py35</code> but you can choose whatever name you want.</p>
+<p>To activate the environment run:</p>
+<div class="sourceCode" id="cb24"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> activate py35</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You now have a completely separate environment with its own Python version.</p>
+<p>Let’s say you instead want an environment with Python version <code>2.7</code> installed. You may for instance want to run scripts or packages that were written for Python 2.x and are thus incompatible with Python 3.x. Simply create the new Conda environment with:</p>
+<div class="sourceCode" id="cb25"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> create <span class="at">-n</span> py27 python=2.7</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Activate this environment with:</p>
+<div class="sourceCode" id="cb26"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> activate py27</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now, switching between Python versions is as easy as typing <code>conda activate py35</code> / <code>conda activate py27</code>.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you create an environment where none of the packages require Python, <em>and</em> you don’t explicitly install the <code>python</code> package then that new environment will use the Python version installed in your <code>base</code> environment.</p>
+</div>
+</div>
+</section>
+</section>
+<section id="decorating-your-prompt" class="level3" data-number="4.3">
+<h3 data-number="4.3" class="anchored" data-anchor-id="decorating-your-prompt"><span class="header-section-number">4.3</span> Decorating your prompt</h3>
+<p>By default, the name of the currently activated environment is added to your command line prompt. This is a good thing, as it makes it easier to keep track of what environment and packages you have access to. The way this is done in the default implementation becomes an issue when using absolute paths for environments (specifying <code>conda env create -p path/to/environment</code>, though, as the entire path will be added to the prompt. This can take up a lot of unnecessary space on your screen, but can be solved in a number of ways.</p>
+<p>The most straightforward way to solve this is to change the Conda configuration file, specifically the settings of the <code>env_prompt</code> configuration value which determines how Conda modifies your command line prompt. For more information about this setting you can run <code>conda config --describe env_prompt</code> and to see your current setting you can run <code>conda config --show env_prompt</code>.</p>
+<p>By default <code>env_prompt</code> is set to <code>({default_env})</code> which modifies your prompt with the active environment name if it was installed using the <code>-n</code> flag or if the environment folder has a parent folder named <code>envs/</code>. Otherwise the full environment path (<em>i.e.</em> the ‘prefix’) is displayed.</p>
+<p>If you instead set <code>env_prompt</code> to <code>({name})</code> Conda will modify your prompt with the folder name of the active environment. You can change the setting by running <code>conda config --set env_prompt '({name}) '</code></p>
+<p>If you wish to keep the <code>({default_env})</code> behaviour, or just don’t want to change your Conda config, an alternative is to keep Conda environment folders within a parent folder called <code>envs/</code>. This will make Conda only add the folder name of the Conda environment to your prompt when you activate it.</p>
+<p>As an example, say you have a project called <em>project_a</em> with the project path <code>~/myprojects/project_a</code>. You could then install the environment for <em>project_a</em> into a folder <code>~/myprojects/project_a/envs/project_a_environment</code>. Activating the environment by pointing Conda to it (<em>e.g.</em> <code>conda activate ~/myprojects/project_a/envs/project_a_environment</code>) will only cause your prompt to be modified with <em>project_a_environment</em>.</p>
+</section>
+<section id="bash-aliases-for-conda" class="level3" data-number="4.4">
+<h3 data-number="4.4" class="anchored" data-anchor-id="bash-aliases-for-conda"><span class="header-section-number">4.4</span> Bash aliases for conda</h3>
+<p>Some programmers like to have aliases (<em>i.e.</em> shortcuts) for common commands. Two aliases that might be useful for you are <code>alias coac='conda activate'</code> and <code>alias code='conda deactivate'</code>. Don’t forget to add them to your <code>~/.bash_profile</code> if you want to use them!</p>
+</section>
+<section id="rolling-back-to-an-earlier-version-of-the-environment" class="level3" data-number="4.5">
+<h3 data-number="4.5" class="anchored" data-anchor-id="rolling-back-to-an-earlier-version-of-the-environment"><span class="header-section-number">4.5</span> Rolling back to an earlier version of the environment</h3>
+<p>The history of the changes to an environment are automatically tracked. You can see revisions to an environment by using:</p>
+<div class="sourceCode" id="cb27"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> list <span class="at">--revisions</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Which shows each revision (numbered) and what’s installed.</p>
+<p>You can revert back to particular revision using:</p>
+<div class="sourceCode" id="cb28"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">--revision</span> 5</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+<section id="mamba-the-drop-in-conda-replacement" class="level3" data-number="4.6">
+<h3 data-number="4.6" class="anchored" data-anchor-id="mamba-the-drop-in-conda-replacement"><span class="header-section-number">4.6</span> Mamba, the drop-in Conda replacement</h3>
+<p>There is another piece of software that is built on top of Conda as a drop-in replacement for it: <em>Mamba</em>. The reason for Mamba’s existence is that it used to have a better solver algorithm for the dependency tree than Conda did. These days, however, this algorithm is included in Conda as the default. There is still some minor reasons you might want to use Mamba, however, the first of which being that Mamba re-implements Conda in C++, which runs slightly faster than the Python-based Conda. This only yields a minor speed increase compared to the dependency-tree algorithm, though, so don’t expect major differences in execution time between Conda and Mamba. Another reason is that Mamba colours its output, which is nice if you care about that sort of thing. If you installed Conda as described in the pre-course material you’ll, conveniently, already have installed Mamba as well!</p>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/pages/containers.html b/pages/containers.html
new file mode 100644
index 00000000..38e81ac5
--- /dev/null
+++ b/pages/containers.html
@@ -0,0 +1,1230 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Controlling your environment with Containers</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Controlling your environment with Containers</h1>
+            <p class="subtitle lead">How to keep keep track of both your environment and operating system</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a>
+  <ul>
+  <li><a href="#downloading-images" id="toc-downloading-images" class="nav-link" data-scroll-target="#downloading-images"><span class="header-section-number">2.1</span> Downloading images</a></li>
+  <li><a href="#running-containers" id="toc-running-containers" class="nav-link" data-scroll-target="#running-containers"><span class="header-section-number">2.2</span> Running containers</a></li>
+  <li><a href="#running-interactively" id="toc-running-interactively" class="nav-link" data-scroll-target="#running-interactively"><span class="header-section-number">2.3</span> Running interactively</a></li>
+  <li><a href="#containers-inside-scripts" id="toc-containers-inside-scripts" class="nav-link" data-scroll-target="#containers-inside-scripts"><span class="header-section-number">2.4</span> Containers inside scripts</a></li>
+  </ul></li>
+  <li><a href="#building-images" id="toc-building-images" class="nav-link" data-scroll-target="#building-images"><span class="header-section-number">3</span> Building images</a>
+  <ul>
+  <li><a href="#understanding-dockerfiles" id="toc-understanding-dockerfiles" class="nav-link" data-scroll-target="#understanding-dockerfiles"><span class="header-section-number">3.1</span> Understanding Dockerfiles</a></li>
+  <li><a href="#building-from-dockerfiles" id="toc-building-from-dockerfiles" class="nav-link" data-scroll-target="#building-from-dockerfiles"><span class="header-section-number">3.2</span> Building from Dockerfiles</a></li>
+  <li><a href="#creating-your-own-dockerfile" id="toc-creating-your-own-dockerfile" class="nav-link" data-scroll-target="#creating-your-own-dockerfile"><span class="header-section-number">3.3</span> Creating your own Dockerfile</a></li>
+  </ul></li>
+  <li><a href="#managing-containers" id="toc-managing-containers" class="nav-link" data-scroll-target="#managing-containers"><span class="header-section-number">4</span> Managing containers</a>
+  <ul>
+  <li><a href="#bind-mounts" id="toc-bind-mounts" class="nav-link" data-scroll-target="#bind-mounts"><span class="header-section-number">4.1</span> Bind mounts</a></li>
+  </ul></li>
+  <li><a href="#sharing-images" id="toc-sharing-images" class="nav-link" data-scroll-target="#sharing-images"><span class="header-section-number">5</span> Sharing images</a></li>
+  <li><a href="#packaging-the-case-study" id="toc-packaging-the-case-study" class="nav-link" data-scroll-target="#packaging-the-case-study"><span class="header-section-number">6</span> Packaging the case study</a></li>
+  <li><a href="#apptainer" id="toc-apptainer" class="nav-link" data-scroll-target="#apptainer"><span class="header-section-number">7</span> Apptainer</a>
+  <ul>
+  <li><a href="#apptainer-in-docker" id="toc-apptainer-in-docker" class="nav-link" data-scroll-target="#apptainer-in-docker"><span class="header-section-number">7.1</span> Apptainer-in-Docker</a></li>
+  <li><a href="#running-apptainer" id="toc-running-apptainer" class="nav-link" data-scroll-target="#running-apptainer"><span class="header-section-number">7.2</span> Running Apptainer</a></li>
+  </ul></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">8</span> Extra material</a>
+  <ul>
+  <li><a href="#building-for-multiple-platforms" id="toc-building-for-multiple-platforms" class="nav-link" data-scroll-target="#building-for-multiple-platforms"><span class="header-section-number">8.1</span> Building for multiple platforms</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p>Container-based technologies are designed to make it easier to create, deploy, and run applications by isolating them in self-contained software units (hence their name). The idea is to package software and/or code together with everything it needs (other packages it depends, various environment settings, <em>etc.</em>) into one unit, <em>i.e.</em> a container. This way we can ensure that the software or code functions in exactly the same way regardless of where it’s executed. Containers are in many ways similar to virtual machines but more lightweight. Rather than starting up a whole new operating system, containers can use the same kernel (usually Linux) as the system that they’re running on. This makes them much faster and smaller compared to virtual machines. While this might sound a bit technical, actually using containers is quite smooth and very powerful.</p>
+<p>Containers have also proven to be a very good solution for packaging, running and distributing scientific data analyses. Some applications of containers relevant for reproducible research are:</p>
+<ul>
+<li>When publishing, package your analyses in a container image and let it accompany the article. This way interested readers can reproduce your analysis at the push of a button.</li>
+<li>Packaging your analysis in a container enables you to develop on <em>e.g.</em> your laptop and seamlessly move to cluster or cloud to run the actual analysis.</li>
+<li>Say that you are collaborating on a project and you are using Mac while your collaborator is using Windows. You can then set up a container image specific for your project to ensure that you are working in an identical environment.</li>
+</ul>
+<p>One of the largest and most widely used container-based technologies is <em>Docker</em>. Just as with Git, Docker was designed for software development but is rapidly becoming widely used in scientific research. Another container-based technology is <em>Apptainer</em> (and the related <em>Singularity</em>), which was developed to work well in computer cluster environments such as Uppmax. We will cover both Docker and Apptainer in this course, but the focus will be be on the former (since that is the most widely used and runs on all three operating systems).</p>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to install Docker if you haven’t done so already, then open up a terminal and go to <code>workshop-reproducible-research/tutorials/containers</code>.</p>
+<div class="callout callout-style-default callout-warning callout-titled" title="Dockage and storage">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Dockage and storage
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Docker images tend to take up quite a lot of space. In order to do all the exercises in this tutorial you need to have ~10 GB available.</p>
+</div>
+</div>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>We’re almost ready to start, just one last note on nomenclature. You might have noticed that we sometimes refer to “Docker images” and sometimes to “Docker containers”. We use images to start containers, so containers are simply an instances of an image. You can have an image containing, say, a certain Linux distribution, and then start multiple containers running that same OS.</p>
+<div class="callout callout-style-default callout-warning callout-titled" title="Root privileges are required">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Root privileges are required
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you don’t have root privileges you have to prepend all Docker commands with <code>sudo</code>.</p>
+</div>
+</div>
+<section id="downloading-images" class="level3" data-number="2.1">
+<h3 data-number="2.1" class="anchored" data-anchor-id="downloading-images"><span class="header-section-number">2.1</span> Downloading images</h3>
+<p>Docker containers typically run Linux, so let’s start by downloading an image containing Ubuntu (a popular Linux distribution that is based on only open-source tools) through the command line.</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> pull ubuntu:latest</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You will notice that it downloads different layers with weird hashes as names. This represents a very fundamental property of Docker images that we’ll get back to in just a little while. The process should end with something along the lines of:</p>
+<pre class="no-highlight"><code>Status: Downloaded newer image for ubuntu:latest
+docker.io/library/ubuntu:latest</code></pre>
+<p>Let’s take a look at our new and growing collection of Docker images:</p>
+<div class="sourceCode" id="cb3"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> image ls</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The Ubuntu image should show up in this list, with something looking like this:</p>
+<pre><code>REPOSITORY       TAG              IMAGE ID            CREATED             SIZE
+ubuntu           latest           d70eaf7277ea        3 weeks ago         72.9MB</code></pre>
+</section>
+<section id="running-containers" class="level3" data-number="2.2">
+<h3 data-number="2.2" class="anchored" data-anchor-id="running-containers"><span class="header-section-number">2.2</span> Running containers</h3>
+<p>We can now start a container from the image we just downloaded. We can refer to the image either by “REPOSITORY:TAG” (“latest” is the default so we can omit it) or “IMAGE ID”. The syntax for <code>docker run</code> is <code>docker run [OPTIONS] IMAGE [COMMAND] [ARG...]</code>. To see the available options run <code>docker run --help</code>. The <code>COMMAND</code> part is any command that you want to run inside the container, it can be a script that you have written yourself, a command line tool or a complete workflow. The <code>ARG</code> part is where you put optional arguments that the command will use.</p>
+<p>Let’s run <code>uname -a</code> to get some info about the operating system. In this case, <code>uname</code> is the <code>COMMAND</code> and <code>-a</code> the <code>ARG</code>. This command will display some general info about your system, and the <code>-a</code> argument tells <code>uname</code> to display all possible information.</p>
+<p>First run it on your own system (use <code>systeminfo</code> if you are on Windows):</p>
+<div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="fu">uname</span> <span class="at">-a</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This should print something like this to your command line:</p>
+<pre class="no-highlight"><code>Darwin liv433l.lan 15.6.0 Darwin Kernel Version 15.6.0: Mon Oct  2 22:20:08 PDT 2017; root:xnu-3248.71.4~1/RELEASE_X86_64 x86_64</code></pre>
+<p>Seems like I’m running the Darwin version of MacOS. Then run it in the Ubuntu Docker container:</p>
+<div class="sourceCode" id="cb7"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run ubuntu uname <span class="at">-a</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here I get the following result:</p>
+<pre class="no-highlight"><code>Linux 24d063b5d877 5.4.39-linuxkit #1 SMP Fri May 8 23:03:06 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux</code></pre>
+<p>And now I’m running on Linux! What happens is that we use the downloaded ubuntu image to run a container that has <code>Ubuntu</code> as the operating system, and we instruct Docker to execute <code>uname -a</code> to print the system info within that container. The output from the command is printed to the terminal.</p>
+<p>Try the same thing with <code>whoami</code> instead of <code>uname -a</code>.</p>
+</section>
+<section id="running-interactively" class="level3" data-number="2.3">
+<h3 data-number="2.3" class="anchored" data-anchor-id="running-interactively"><span class="header-section-number">2.3</span> Running interactively</h3>
+<p>So, seems we can execute arbitrary commands on Linux. This looks useful, but maybe a bit limited. We can also get an interactive terminal with the flags <code>-it</code>.</p>
+<div class="sourceCode" id="cb9"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-it</span> ubuntu</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Your prompt should now look similar to:</p>
+<pre class="no-highlight"><code>root@1f339e929fa9:/#</code></pre>
+<p>You are now using a terminal inside a container running Ubuntu. Here you can do whatever; install, run, remove stuff. Anything you do will be isolated within the container and never affect your host system.</p>
+<p>Now exit the container with <code>exit</code>.</p>
+</section>
+<section id="containers-inside-scripts" class="level3" data-number="2.4">
+<h3 data-number="2.4" class="anchored" data-anchor-id="containers-inside-scripts"><span class="header-section-number">2.4</span> Containers inside scripts</h3>
+<p>Okay, so Docker lets us work in any OS in a quite convenient way. That would probably be useful on its own, but Docker is much more powerful than that. For example, let’s look at the <code>shell</code> part of the <code>index_genome</code> rule in the Snakemake workflow for the MRSA case study:</p>
+<div class="sourceCode" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>shell:</span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="co">    bowtie2-build tempfile results/bowtie2/{wildcards.genome_id} &gt; {log}</span></span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You may have seen that one can use containers through both Snakemake and Nextflow if you’ve gone through their tutorial’s extra material, but we can also use containers directly inside scripts in a very simple way. Let’s imagine we want to run the above command using containers instead. How would that look? It’s quite simple, really: first we find a container image that has <code>bowtie2</code> installed, and then prepend the command with <code>docker run &lt;image&gt;</code>.</p>
+<p>First of all we need to download the genome to index though, so run:</p>
+<div class="sourceCode" id="cb12"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="ex">curl</span> <span class="at">-o</span> NCTC8325.fa.gz ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">gunzip</span> <span class="at">-c</span> NCTC8325.fa.gz <span class="op">&gt;</span> tempfile</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To download and prepare the input for Bowtie2.</p>
+<p>Now try running the following Bash code:</p>
+<div class="sourceCode" id="cb13"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>:/analysis quay.io/biocontainers/bowtie2:2.5.1--py39h3321a2d_0 bowtie2-build /analysis/tempfile /analysis/NCTC8325</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Docker will automatically download the container image for Bowtie2 version 2.5.1 from the remote repository <code>https://quay.io/repository/biocontainers/bowtie2</code> and subsequently run the command! This is the <code>docker run [OPTIONS] IMAGE [COMMAND] [ARG...]</code> syntax just like before. In this case <code>quay.io/biocontainers/bowtie2:2.5.1--py39h3321a2d_0</code> is the IMAGE but instead of first downloading and then running it we point to its remote location directly, which will cause Docker to download it on the fly. The <code>bowtie2-build</code> part is the COMMAND followed by the ARG (the input tempfile and the output index)</p>
+<p>The <code>-v $(pwd):/analysis</code> part is the OPTIONS which we use to mount the current directory inside the container in order to make the <code>tempfile</code> input available to Bowtie2. More on these so-called “Bind mounts” in Section 4 of this tutorial.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use <code>docker pull</code> for downloading remotely stored images</li>
+<li>How to use <code>docker image ls</code> for getting information about the images we have on our system.</li>
+<li>How to use <code>docker run</code> for starting a container from an image.</li>
+<li>How to use the <code>-it</code> flag for running in interactive mode.</li>
+<li>How to use Docker inside scripts.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="building-images" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="building-images"><span class="header-section-number">3</span> Building images</h2>
+<p>In the previous section we downloaded a Docker image of Ubuntu and noticed that it was based on layers, each with a unique hash as id. An image in Docker is based on a number of read-only layers, where each layer contains the differences to the previous layers. If you’ve done the <a href="git-1-introduction">Git tutorial</a> this might remind you of how a Git commit contains the difference to the previous commit. The great thing about this is that we can start from one base layer, say containing an operating system and some utility programs, and then generate many new images based on this, say 10 different project-specific images. This dramatically reduces the storage space requirements. For example, Bioconda (see the <a href="conda-1-introduction">Conda tutorial</a>) has one base image and then one individual layer for each of the more than 3000 packages available in Bioconda.</p>
+<p>Docker provides a convenient way to describe how to go from a base image to the image we want by using a “Dockerfile”. This is a simple text file containing the instructions for how to generate each layer. Docker images are typically quite large, often several GBs, while Dockerfiles are small and serve as blueprints for the images. It is therefore good practice to have your Dockerfile in your project Git repository, since it allows other users to exactly replicate your project environment.</p>
+<p>We will be looking at a Dockerfile called <code>Dockerfile_slim</code> that is located in your <code>containers</code> directory (where you should hopefully be standing already). We will now go through that file and discuss the different steps and what they do. After that we’ll build the image and test it out. Lastly, we’ll start from that image and make a new one to reproduce the results from the <a href="conda-3-projects">Conda tutorial</a>.</p>
+<section id="understanding-dockerfiles" class="level3" data-number="3.1">
+<h3 data-number="3.1" class="anchored" data-anchor-id="understanding-dockerfiles"><span class="header-section-number">3.1</span> Understanding Dockerfiles</h3>
+<p>Here are the first few lines of <code>Dockerfile_slim</code>. Each line in the Dockerfile will typically result in one layer in the resulting image. The format for Dockerfiles is <code>INSTRUCTION arguments</code>. A full specification of the format, together with best practices, can be found <a href="https://docs.docker.com/engine/reference/builder/">here</a>.</p>
+<div class="sourceCode" id="cb14"><pre class="sourceCode dockerfile code-with-copy"><code class="sourceCode dockerfile"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="kw">FROM</span> condaforge/miniforge3</span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="kw">LABEL</span> description = <span class="st">"Minimal image for the NBIS reproducible research course."</span></span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="kw">MAINTAINER</span> <span class="st">"John Sundh"</span> john.sundh@scilifelab.se</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here we use the instructions <code>FROM</code>, <code>LABEL</code> and <code>MAINTAINER</code>. While <code>LABEL</code> and <code>MAINTAINER</code> is just meta-data that can be used for organizing your various Docker components the important one is <code>FROM</code>, which specifies the base image we want to start from. Because we want to use <code>conda</code> to install packages we will start from an image from the conda-forge community that has <code>conda</code> pre-installed. This image was in turn built using a Dockerfile as a blueprint and then uploaded to <a href="https://hub.docker.com/r/condaforge/miniforge3">Dockerhub</a>. The conda-forge community keeps the Dockerfile in a git repository and you can view the file <a href="https://github.com/conda-forge/miniforge-images/blob/master/ubuntu/Dockerfile">here</a>. You will see that it starts from an official Ubuntu image (check the first line with the <code>FROM</code> instruction), followed by code to install various packages including conda.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="There are many roads to Rome">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+There are many roads to Rome
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>When it comes to choosing the best image to start from there are multiple routes you could take. Say you want to run RStudio in a Conda environment through a Jupyter notebook. You could then start from one of the <a href="https://github.com/rocker-org/rocker">rocker images</a> for R, a <a href="https://hub.docker.com/r/condaforge/miniforge3">Condaforge image</a>, or a <a href="https://hub.docker.com/r/jupyter/">Jupyter image</a>. Or you just start from one of the low-level official images and set up everything from scratch.</p>
+</div>
+</div>
+<p>Let’s take a look at the next section of <code>Dockerfile_slim</code>.</p>
+<div class="sourceCode" id="cb15"><pre class="sourceCode dockerfile code-with-copy"><code class="sourceCode dockerfile"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Use bash as shell</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="kw">SHELL</span> [<span class="st">"/bin/bash"</span>, <span class="st">"--login"</span>, <span class="st">"-c"</span>]</span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Set workdir</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="kw">WORKDIR</span> /course</span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Set time zone</span></span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="kw">ENV</span> TZ=<span class="st">"Europe/Stockholm"</span></span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="kw">ENV</span> DEBIAN_FRONTEND=noninteractive</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p><code>SHELL</code> simply sets which shell to use and <code>WORKDIR</code> determines the directory the container should start in. The <code>ENV</code> instruction is used to set environmental variables and here we use it to set the time zone by declaring a <code>TZ</code> variable. The <code>DEBIAN_FRONTEND=noninteractive</code> line means that we force the subsequent installation to not prompt us to set the time zone manually.</p>
+<p>The next few lines introduce the important <code>RUN</code> instruction, which is used for executing shell commands:</p>
+<div class="sourceCode" id="cb16"><pre class="sourceCode dockerfile code-with-copy"><code class="sourceCode dockerfile"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Install package for setting time zone</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="kw">RUN</span> <span class="ex">apt-get</span> update <span class="kw">&amp;&amp;</span> <span class="ex">apt-get</span> install <span class="at">-y</span> tzdata <span class="kw">&amp;&amp;</span> <span class="ex">apt-get</span> clean</span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Configure Conda</span></span>
+<span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a><span class="kw">RUN</span> <span class="ex">conda</span> init bash <span class="kw">&amp;&amp;</span> <span class="ex">conda</span> config <span class="at">--set</span> channel_priority strict <span class="kw">&amp;&amp;</span> <span class="dt">\</span></span>
+<span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    <span class="ex">conda</span> config <span class="at">--append</span> channels bioconda <span class="kw">&amp;&amp;</span> <span class="dt">\</span></span>
+<span id="cb16-7"><a href="#cb16-7" aria-hidden="true" tabindex="-1"></a>    <span class="ex">conda</span> config <span class="at">--append</span> channels r <span class="kw">&amp;&amp;</span> <span class="dt">\</span></span>
+<span id="cb16-8"><a href="#cb16-8" aria-hidden="true" tabindex="-1"></a>    <span class="ex">conda</span> config <span class="at">--set</span> subdir linux-64</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The first RUN command installs the <code>tzdata</code> package for managing local time settings in the container. This may not always be required for your Dockerfile but it’s added here because some R packages used in the course require it.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>While installing things with <code>apt-get</code> inside Dockerfiles is relatively common practice, it’s important to note that this <em>may</em> affect reproducibility, since it’s not common to specify an exact version. The packages installed in this manner are, however, usually not important for the actual analyses performed, but rather help in the building of the container image itself. While not critical, it’s important to note this from a reproducibility perspective.</p>
+</div>
+</div>
+<p>Next, we run <code>conda init bash</code> to initialize the bash shell inside the image, meaning we can use <code>conda activate</code> in containers that run from the image. In the same <code>RUN</code> statement we also configure the strict channel priority and add appropriate channels with <code>conda config</code>. You’ll probably recognize this from the <a href="../course-information/pre-course-setup">pre-course-setup</a>. The last part sets the somewhat obscure <code>subdir</code> config parameter pointing to the <code>linux-64</code> architecture of conda channels.</p>
+<p>As a general rule, you want each layer in an image to be a “logical unit”. For example, if you want to install a program the <code>RUN</code> command should both retrieve the program, install it and perform any necessary clean up. This is due to how layers work and how Docker decides what needs to be rerun between builds. More on this later.</p>
+<p>Next up is:</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode dockerfile code-with-copy"><code class="sourceCode dockerfile"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Open port for running Jupyter Notebook</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="kw">EXPOSE</span> 8888</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Start Bash shell by default</span></span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a><span class="kw">CMD</span> <span class="ex">/bin/bash</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p><code>EXPOSE</code> opens up the port 8888, so that we can later run a Jupyter Notebook server on that port. <code>CMD</code> is an interesting instruction. It sets what a container should run when nothing else is specified, <em>i.e.</em> if you run <code>docker run [OPTIONS] [IMAGE]</code> without the additional <code>[COMMAND] [ARG]</code>. It can be used for example for printing some information on how to use the image or, as here, start a Bash shell for the user. If the purpose of your image is to accompany a publication then <code>CMD</code> could be to run the workflow that generates the paper figures from raw data, <em>e.g.</em> <code>CMD snakemake -s Snakefile -c 1 generate_figures</code>.</p>
+</section>
+<section id="building-from-dockerfiles" class="level3" data-number="3.2">
+<h3 data-number="3.2" class="anchored" data-anchor-id="building-from-dockerfiles"><span class="header-section-number">3.2</span> Building from Dockerfiles</h3>
+<p>Now we understand how a Dockerfile works. Constructing the image itself from the Dockerfile can be done as follows - try it out:</p>
+<div class="callout callout-style-default callout-warning callout-titled" title="Image platforms on newer Macs">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Image platforms on newer Macs
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If your computer is a MAC with the M1 chip, you may have to add <code>--platform linux/x86_64</code> to the <code>docker build</code> command.</p>
+</div>
+</div>
+<div class="sourceCode" id="cb18"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> build <span class="at">-f</span> Dockerfile_slim <span class="at">-t</span> my_docker_image .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This should result in something similar to this:</p>
+<pre><code> [+] Building 2.2s (7/7) FINISHED
+ =&gt; [internal] load build definition from Dockerfile_slim                                                                                                                                             0.0s
+ =&gt; =&gt; transferring dockerfile: 667B                                                                                                                                                                  0.0s
+ =&gt; [internal] load .dockerignore                                                                                                                                                                     0.0s
+ =&gt; =&gt; transferring context: 2B                                                                                                                                                                       0.0s
+ =&gt; [internal] load metadata for docker.io/condaforge/miniforge3:latest                                                                                                                               0.0s
+ =&gt; [1/3] FROM docker.io/condaforge/miniforge3                                                                                                                                                        0.0s
+ =&gt; CACHED [2/3] WORKDIR /course                                                                                                                                                                      0.0s
+ =&gt; [3/3] RUN conda init bash &amp;&amp; conda config --set channel_priority strict &amp;&amp;     conda config --append channels bioconda &amp;&amp;     conda config --append channels r &amp;&amp;     conda config --set subdir   2.1s
+ =&gt; exporting to image                                                                                                                                                                                0.0s
+ =&gt; =&gt; exporting layers                                                                                                                                                                               0.0s
+ =&gt; =&gt; writing image sha256:53e6efeaa063eadf44c509c770d887af5e222151f08312e741aecc687e6e8981                                                                                                          0.0s
+ =&gt; =&gt; naming to docker.io/library/my_docker_image</code></pre>
+<p>Exactly how the output looks depends on which version of Docker you are using. The <code>-f</code> flag sets which Dockerfile to use and <code>-t</code> tags the image with a name. This name is how you will refer to the image later. Lastly, the <code>.</code> is the path to where the image should be build (<code>.</code> means the current directory). This had no real impact in this case, but matters if you want to import files. Validate with <code>docker image ls</code> that you can see your new image.</p>
+</section>
+<section id="creating-your-own-dockerfile" class="level3" data-number="3.3">
+<h3 data-number="3.3" class="anchored" data-anchor-id="creating-your-own-dockerfile"><span class="header-section-number">3.3</span> Creating your own Dockerfile</h3>
+<p>Now it’s time to make your own Dockerfile to reproduce the results from the <a href="conda-3-projects">Conda tutorial</a>. If you haven’t done the tutorial, it boils down to creating a Conda environment file, setting up that environment, downloading three RNA-seq data files, and running FastQC on those files. We will later package and run the whole RNA-seq workflow in a Docker container, but for now we keep it simple to reduce the size and time required.</p>
+<p>The Conda tutorial uses a shell script, <code>run_qc.sh</code>, for downloading and running the analysis. A copy of this file should also be available in your current directory. If we want to use the same script we need to include it in the image. A basic outline of what we need to do is:</p>
+<ol type="1">
+<li>Create a file called <code>Dockerfile_conda</code></li>
+<li>Start the image from the <code>my_docker_image</code> we just built</li>
+<li>Install the package <code>fastqc</code> which is required for the analysis.</li>
+<li>Add the <code>run_qc.sh</code> script to the image</li>
+<li>Set the default command of the image to run the <code>run_qc.sh</code> script.</li>
+</ol>
+<p>We’ll now go through these steps in more detail. Try to add the corresponding code to <code>Dockerfile_conda</code> on your own, and if you get stuck you can click to reveal the solution below under “Click to show solution”.</p>
+<p><strong>Set image starting point</strong></p>
+<p>To set the starting point of the new image, use the <code>FROM</code> instruction and point to <code>my_docker_image</code> that we built in the previous <em>Building from Dockerfiles</em> step.</p>
+<p><strong>Install packages</strong></p>
+<p>Use the <code>RUN</code> instruction to install the package <code>fastqc=0.11.9</code> with conda. Here there are several options available. For instance we could add an environment file <em>e.g.</em> <code>environment.yml</code> from the Conda tutorial and use <code>conda env create</code> to create an environment from that file. Or we could create an environment directly with <code>conda create</code>. We’ll try this later option here, so add a line that will create an environment named <code>project_mrsa</code> containing the <code>fastqc</code> package, and also clean up packages and cache after installation. Use the <code>-y</code> flag to <code>conda create</code> to avoid the prompt that expects an interaction from the user.</p>
+<p>In order to have the <code>project_mrsa</code> environment activated upon start-up we need to add two more lines to the Dockerfile. First we need to use a <code>RUN</code> instruction to run <code>echo "source activate project_mrsa" &gt;&gt; ~/.bashrc</code>, and then we need to use the <code>ENV</code> instruction to set the <code>$PATH</code> variable inside the image to <code>/opt/conda/envs/project_mrsa/bin:$PATH</code>.</p>
+<p><strong>Add the analysis script</strong></p>
+<p>Use the <code>COPY</code> instruction to Add <code>run_qc.sh</code> to the image. The syntax is <code>COPY SOURCE TARGET</code>. In this case <code>SOURCE</code> is the <code>run_qc.sh</code> script and <code>TARGET</code> is a path inside the image, for simplicity it can be specified with <code>./</code>.</p>
+<p><strong>Set default command</strong></p>
+<p>Use the <code>CMD</code> instruction to set the default command for the image to <code>bash run_qc.sh</code>.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-7-contents" aria-controls="callout-7" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-7" class="callout-7-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<div class="sourceCode" id="cb20"><pre class="sourceCode dockerfile code-with-copy"><code class="sourceCode dockerfile"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="kw">FROM</span> my_docker_image</span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="kw">RUN</span> <span class="ex">conda</span> create <span class="at">-y</span> <span class="at">-n</span> project_mrsa <span class="at">-c</span> bioconda fastqc=0.11.9 <span class="kw">&amp;&amp;</span> <span class="ex">conda</span> clean <span class="at">-a</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a><span class="kw">RUN</span> <span class="bu">echo</span> <span class="st">"source activate project_mrsa"</span> <span class="op">&gt;&gt;</span> ~/.bashrc</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="kw">ENV</span> PATH=/opt/conda/envs/project_mrsa/bin:$PATH</span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-9"><a href="#cb20-9" aria-hidden="true" tabindex="-1"></a><span class="kw">COPY</span> run_qc.sh .</span>
+<span id="cb20-10"><a href="#cb20-10" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb20-11"><a href="#cb20-11" aria-hidden="true" tabindex="-1"></a><span class="kw">CMD</span> <span class="fu">bash</span> run_qc.sh</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</div>
+<p>Build the image and tag it <code>my_docker_conda</code> (remember to add <code>--platform linux/x86_64</code> to the build command if you are using a Mac with the Apple chip).</p>
+<div class="sourceCode" id="cb21"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> build <span class="at">-t</span> my_docker_conda <span class="at">-f</span> Dockerfile_conda .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Verify that the image was built using <code>docker image ls</code>.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How the keywords <code>FROM</code>, <code>LABEL</code>, <code>MAINTAINER</code>, <code>RUN</code>, <code>ENV</code>, <code>SHELL</code>, <code>WORKDIR</code>, and <code>CMD</code> can be used when writing a Dockerfile.</li>
+<li>How to use <code>docker build</code> to construct and tag an image from a Dockerfile.</li>
+<li>How to create your own Dockerfile.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="managing-containers" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="managing-containers"><span class="header-section-number">4</span> Managing containers</h2>
+<p>When you start a container with <code>docker run</code> it is given an unique id that you can use for interacting with the container. Let’s try to run a container from the image we just created:</p>
+<div class="sourceCode" id="cb22"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run my_docker_conda</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If everything worked <code>run_qc.sh</code> is executed and will first download and then analyse the three samples. Once it’s finished you can list all containers, including those that have exited.</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> container ls <span class="at">--all</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This should show information about the container that we just ran. Similar to:</p>
+<pre><code>CONTAINER ID   IMAGE            COMMAND                  CREATED         STATUS          PORTS      NAMES
+b6f7790462c4   my_docker_conda   "tini -- /bin/bash -…"  3 minutes ago   Up 24 seconds   8888/tcp   sad_maxwell</code></pre>
+<p>If we run <code>docker run</code> without any flags, your local terminal is attached to the container. This enables you to see the output of <code>run_qc.sh</code>, but also disables you from doing anything else in the meantime. We can start a container in detached mode with the <code>-d</code> flag. Try this out and run <code>docker container ls</code> to validate that the container is running.</p>
+<p>By default, Docker keeps containers after they have exited. This can be convenient for debugging or if you want to look at logs, but it also consumes huge amounts of disk space. It’s therefore a good idea to always run with <code>--rm</code>, which will remove the container once it has exited.</p>
+<p>If we want to enter a running container, there are two related commands we can use, <code>docker attach</code> and <code>docker exec</code>. <code>docker attach</code> will attach local standard input, output, and error streams to a running container. This can be useful if your terminal closed down for some reason or if you started a terminal in detached mode and changed your mind. <code>docker exec</code> can be used to execute any command in a running container. It’s typically used to peak in at what is happening by opening up a new shell. Here we start the container in detached mode and then start a new interactive shell so that we can see what happens. If you use <code>ls</code> inside the container you can see how the script generates file in the <code>data</code> and <code>results</code> directories. Note that you will be thrown out when the container exits, so you have to be quick.</p>
+<div class="sourceCode" id="cb25"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-d</span> <span class="at">--rm</span> <span class="at">--name</span> my_container my_docker_conda</span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> exec <span class="at">-it</span> my_container /bin/bash</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<section id="bind-mounts" class="level3" data-number="4.1">
+<h3 data-number="4.1" class="anchored" data-anchor-id="bind-mounts"><span class="header-section-number">4.1</span> Bind mounts</h3>
+<p>There are obviously some advantages to isolating and running your data analysis in containers, but at some point you need to be able to interact with the rest of the host system (<em>e.g.</em> your laptop) to actually deliver the results. This is done via bind mounts. When you use a bind mount, a file or directory on the <em>host machine</em> is mounted into a container. That way, when the container generates a file in such a directory it will appear in the mounted directory on your host system.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Docker also has a more advanced way of data storage called <a href="https://docs.docker.com/storage/volumes/">volumes</a>. Volumes provide added flexibility and are independent of the host machine’s file system having a specific directory structure available. They are particularly useful when you want to share data <em>between</em> containers.</p>
+</div>
+</div>
+<p>Say that we are interested in getting the resulting html reports from FastQC in our container. We can do this by mounting a directory called, say, <code>fastqc_results</code> in your current directory to the <code>/course/results/fastqc</code> directory in the container. Try this out by running:</p>
+<div class="sourceCode" id="cb26"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">--rm</span> <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>/fastqc_results:/course/results/fastqc my_docker_conda</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here the <code>-v</code> flag to docker run specifies the bind mount in the form of <code>directory/on/your/computer:/directory/inside/container</code>. <code>$(pwd)</code> simply evaluates to the working directory on your computer.</p>
+<p>Once the container finishes validate that it worked by opening one of the html reports under <code>fastqc_results/</code>.</p>
+<p>We can also use bind mounts for getting files into the container rather than out. We’ve mainly been discussing Docker in the context of packaging an analysis pipeline to allow someone else to reproduce its outcome. Another application is as a kind of very powerful environment manager, similarly to how we’ve used Conda before. If you’ve organized your work into projects, then you can mount the whole project directory in a container and use the container as the terminal for running stuff while still using your normal OS for editing files and so on. Let’s try this out by mounting our current directory and start an interactive terminal. Note that this will override the <code>CMD</code> command, so we won’t start the analysis automatically when we start the container.</p>
+<div class="sourceCode" id="cb27"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-it</span> <span class="at">--rm</span> <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>:/course/ my_docker_conda /bin/bash</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you run <code>ls</code> you will see that all the files in the <code>container/</code> directory are there.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use <code>docker run</code> for starting a container and how the flags <code>-d</code> and <code>--rm</code> work.</li>
+<li>How to use <code>docker container ls</code> for displaying information about the containers.</li>
+<li>How to use <code>docker attach</code> and <code>docker exec</code> to interact with running containers.</li>
+<li>How to use bind mounts to share data between the container and the host system.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="sharing-images" class="level2" data-number="5">
+<h2 data-number="5" class="anchored" data-anchor-id="sharing-images"><span class="header-section-number">5</span> Sharing images</h2>
+<p>There would be little point in going through all the trouble of making your analyses reproducible if you can’t distribute them to others. Luckily, sharing Docker containers is extremely easy, and can be done in several ways. One of the more common ways to share Docker images is through container <em>registries</em> and <em>repositories</em>.</p>
+<p>For example, a Docker registry is a service that stores Docker images, which could be hosted by a third party, publicly or privately. One of the most common registries is <a href="https://docs.docker.com/docker-hub/">Docker Hub</a>, which is a registry hosted by Docker itself. A repository, on the other hand, is a collection of container images with the same name but different tags (<em>i.e.</em> versions), for example <code>ubuntu:latest</code> or <code>ubuntu:20.04</code>. Repositories are stored in registries.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Remember that we now have some clashing nomenclature between Git repositories (which we covered in the Git tutorial) and container repositories, so be aware of which one you’re talking about!</p>
+</div>
+</div>
+<p>There are many registries out there, but here are some that might be of interest to you who are taking this course:</p>
+<ul>
+<li><a href="https://docs.docker.com/docker-hub/">Docker Hub</a></li>
+<li><a href="https://ghcr.io">GitHub Container Registry</a></li>
+<li><a href="https://quay.io/">Quay</a></li>
+<li><a href="https://biocontainers.pro/#/registry">Biocontainers</a></li>
+<li><a href="https://www.rocker-project.org/images/">Rocker</a></li>
+<li><a href="https://jupyter-docker-stacks.readthedocs.io/en/latest">Jupyter containers</a></li>
+</ul>
+<p>The most common registry is probably Docker Hub, which lets you host unlimited public images and one private image for free (after which they charge a small fee). The GitHub Container Registry is also quite handy if you’re already using GitHub. Let’s see how it’s done using Docker Hub!</p>
+<ol type="1">
+<li><p>Register for an account on <a href="https://hub.docker.com">Docker Hub</a>.</p></li>
+<li><p>Use <code>docker login -u your_dockerhub_id</code> to login to the Docker Hub registry. Or use the <strong>Sign in</strong> button in Docker Desktop.</p></li>
+<li><p>When you build an image, tag it with <code>-t your_dockerhub_id/image_name</code>, rather than just <code>image_name</code>.</p></li>
+<li><p>Once the image has been built, upload it to Docker Hub with <code>docker push your_dockerhub_id/image_name</code>.</p></li>
+<li><p>If another user runs <code>docker run your_dockerhub_id/image_name</code> the image will automatically be retrieved from Docker Hub. You can use <code>docker pull</code> for downloading without running.</p></li>
+</ol>
+<p>If you want to refer to a Docker image in for example a publication, it’s very important that it’s the correct version of the image. This is handled via the ‘tags’ (<em>e.g.</em> <code>docker build -t your_dockerhub_id/image_name:tag_name</code>) that we introduced in <a href="containers-2-the-basics">Containers 2: The basics</a> and used when building images in <a href="containers-3-building-images">Containers 3: Building images</a>.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>On Docker Hub it is also possible to link to your Bitbucket or GitHub account and select repositories from which you want to automatically build and distribute Docker images. The Docker Hub servers will then build an image from the Dockerfile in your Git repository and make it available for download using <code>docker pull</code>. That way, you don’t have to bother manually building and pushing using <code>docker push</code>. The GitHub repository for this course is linked to Docker Hub and the Docker images are built automatically from <code>Dockerfile</code> and <code>Dockerfile_slim</code>, triggered by changes made to the GitHub repository. You can take a look at the course on Docker Hub <a href="https://hub.docker.com/r/nbisweden/workshop-reproducible-research">here</a>.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How container registries and repositories work</li>
+<li>How to use Docker Hub to share Docker images</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="packaging-the-case-study" class="level2" data-number="6">
+<h2 data-number="6" class="anchored" data-anchor-id="packaging-the-case-study"><span class="header-section-number">6</span> Packaging the case study</h2>
+<p>During these tutorials we have been working on a case study about the multi-resistant bacteria MRSA. Here we will build and run a Docker container that contains all the work we’ve done so far.</p>
+<ul>
+<li>We’ve <a href="git-7-working-remotely">set up a GitHub repository</a> for version control and for hosting our project.</li>
+<li>We’ve defined a <a href="conda-3-projects">Conda environment</a> that specifies the packages we’re depending on in the project.</li>
+<li>We’ve constructed a <a href="snakemake-10-generalizing-workflows">Snakemake workflow</a> that performs the data analysis and keeps track of files and parameters.</li>
+<li>We’ve written a <a href="quarto-1-introduction">Quarto document</a> that takes the results from the Snakemake workflow and summarizes them in a report.</li>
+</ul>
+<p>The <code>workshop-reproducible-research/tutorials/containers</code> directory contains the final versions of all the files we’ve generated in the other tutorials: <code>environment.yml</code>, <code>Snakefile</code>, <code>config.yml</code> and <code>code/supplementary_material.qmd</code>. The only difference compared to the other tutorials is that we have also included the rendering of the Supplementary Material HTML file into the Snakemake workflow as the rule <code>make_supplementary</code>. Running all of these steps will take some time to execute (around 20 minutes or so), in particular if you’re on a slow internet connection.</p>
+<p>Now take a look at <code>Dockerfile</code>. Everything should look quite familiar to you, since it’s basically the same steps as in the image we constructed in the <a href="containers-3-building-images">Building images</a> section, although with some small modifications. The main difference is that we add the project files needed for executing the workflow (mentioned in the previous paragraph), and install the conda packages using <code>environment.yml</code>. If you look at the <code>CMD</code> command you can see that it will run the whole Snakemake workflow by default.</p>
+<p>Now run <code>docker build</code> as before, tag the image with <code>my_docker_project</code> (remember the <code>--platform linux/x86_64</code> flag if you’re on a new Mac with the Apple chip):</p>
+<div class="sourceCode" id="cb28"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> build <span class="at">-t</span> my_docker_project <span class="at">-f</span> Dockerfile .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Go get a coffee while the image builds (or you could use <code>docker pull nbisweden/workshop-reproducible-research</code> which will download the same image).</p>
+<p>Validate with <code>docker image ls</code>. Now all that remains is to run the whole thing with <code>docker run</code>. We just want to get the results, so mount the directory <code>/course/results/</code> to, say, <code>results/</code> in your current directory. Click below to see how to write the command.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-14-contents" aria-controls="callout-14" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-14" class="callout-14-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<p>If building your own image:</p>
+<div class="sourceCode" id="cb29"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>/results:/course/results my_docker_project</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you pulled the image from DockerHub:</p>
+<div class="sourceCode" id="cb30"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>/results:/course/results nbisweden/workshop-reproducible-research</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</div>
+<p>Well done! You now have an image that allows anyone to exactly reproduce your analysis workflow (if you first <code>docker push</code> to Dockerhub that is).</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you’ve done the <a href="jupyter-1-introduction">Jupyter tutorial</a>, you know that Jupyter Notebook runs as a web server. This makes it very well suited for running in a Docker container, since we can just expose the port Jupyter Notebook uses and redirect it to one of our own. You can then work with the notebooks in your browser just as you’ve done before, while it’s actually running in the container. This means you could package your data, scripts and environment in a Docker image that also runs a Jupyter Notebook server. If you make this image available, say on Dockerhub, other researchers could then download it and interact with your data/code via the fancy interactive Jupyter notebooks that you have prepared for them. We haven’t made any fancy notebooks for you, but we <em>have</em> set up a Jupyter Notebook server. Try it out if you want to (replace the image name with your version if you’ve built it yourself):</p>
+<div class="sourceCode" id="cb31"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="at">-it</span> nbisweden/workshop-reproducible-research jupyter notebook <span class="at">-allow-root</span> <span class="at">--no-browser</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</section>
+<section id="apptainer" class="level2" data-number="7">
+<h2 data-number="7" class="anchored" data-anchor-id="apptainer"><span class="header-section-number">7</span> Apptainer</h2>
+<p>Apptainer is a container software alternative to Docker. It was originally developed as <em>Singularity</em> by researchers at Lawrence Berkeley National Laboratory (read more about this below) with focus on security, scientific software, and HPC clusters. One of the ways in which Apptainer is more suitable for HPC is that it very actively restricts permissions so that you do not gain access to additional resources while inside the container. Apptainer also, unlike Docker, stores images as single files using the <em>Singularity Image Format</em> (SIF). A SIF file is self-contained and can be moved around and shared like any other file, which also makes it easy to work with on an HPC cluster.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Apptainer and Singularity">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Apptainer and Singularity
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>The open source Singularity project was renamed to <em>Apptainer</em> in 2021. The company <em>Sylabs</em> still keeps their commercial branch of the project under the Singularity name, and offer a free ‘Community Edition’ version. The name change was done in order to clarify the distinction between the open source project and the various commercial versions. At the moment there is virtually no difference to you as a user whether you use Singularity or Apptainer, but eventually it’s very likely that the two will diverge.</p>
+</div>
+</div>
+<p>While it is possible to define and build Apptainer images from scratch, in a manner similar to what you’ve already learned for Docker, this is not something we will cover here (but feel free to read more about this in <em>e.g.</em> the <a href="https://apptainer.org/docs/user/main/index.html">Apptainer docs</a>.</p>
+<p>The reasons for not covering Apptainer more in-depth are varied, but it basically boils down to it being more or less Linux-only, unless you use Virtual Machines (VMs). Even with this you’ll run into issues of incompatibility of various kinds, and these issues are further compounded if you’re on one of the new ARM64-Macs. You also need <code>root</code> (admin) access in order to actually <em>build</em> Apptainer images regardless of platform, meaning that you can’t build them on <em>e.g.</em> Uppmax, even though Apptainer is already installed there. You can, however, use the <code>--remote</code> flag, which runs the build on Apptainer’s own servers. This doesn’t work in practice a lot of the time, though, since most scientist will work in private Git repositories so that their research and code is not available to anybody, and the <code>--remote</code> flag requires that <em>e.g.</em> the <code>environment.yml</code> file is publicly available.</p>
+<p>There are very good reasons to use Apptainer, however, the major one being that you aren’t allowed to use Docker on most HPC systems! One of the nicer features of Apptainer is that it can convert Docker images directly for use within Apptainer, which is highly useful for the cases when you already built your Docker image or if you’re using a remotely available image stored on <em>e.g.</em> DockerHub. For a lot of scientific work based in R and/or Python, however, it is most often the case that you build your own images, since you have a complex dependency tree of software packages not readily available in existing images. So, we now have another problem for building our own images:</p>
+<ol type="1">
+<li>Only Apptainer is allowed on HPC systems, but you can’t build images there due to not having <code>root</code> access.</li>
+<li>You can build Apptainer images locally and transfer them to HPCs, but this is problematic unless you’re running Linux natively.</li>
+</ol>
+<p>Seems like a “catch 22”-problem, right? There are certainly workarounds (some of which we have already mentioned) but most are roundabout or difficult to get working for all use-cases. Funnily enough, there’s a simple solution: run Apptainer locally from inside a Docker container! Conceptually very meta, yes, but works very well in practice. What we are basically advocating for is that you stick with Docker for most of your container-based work, but convert your Docker images using Apptainer-in-Docker whenever you need to work on an HPC. This is of course not applicable to Linux users or those of you who are fine with working through using VMs and managing any issues that arise from doing that.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Summary">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Summary
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Apptainer is a great piece of software that is easiest to use if you’re working on a Linux environment. Docker is, however, easier to use from a cross-platform standpoint and covers all use-cases except running on HPCs. Running on HPCs can be done by converting existing Docker images at runtime, while building images for use on HPCs can be done using local Docker images and Apptainer-in-Docker.</p>
+</div>
+</div>
+<section id="apptainer-in-docker" class="level3" data-number="7.1">
+<h3 data-number="7.1" class="anchored" data-anchor-id="apptainer-in-docker"><span class="header-section-number">7.1</span> Apptainer-in-Docker</h3>
+<p>By creating a bare-bones, Linux-based Docker image with Apptainer you can build Apptainer images locally on non-Linux operating systems. There is already a good image setup for just this, and it is defined in this <a href="https://github.com/kaczmarj/apptainer-in-docker">GitHub repository</a>. Looking at the instructions there we can see that we need to do the following:</p>
+<div class="sourceCode" id="cb32"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> run <span class="dt">\</span></span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>    <span class="at">--rm</span> <span class="dt">\</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>    <span class="at">-v</span> /var/run/docker.sock:/var/run/docker.sock <span class="dt">\</span></span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>    <span class="at">-v</span> <span class="va">$(</span><span class="bu">pwd</span><span class="va">)</span>:/work <span class="dt">\</span></span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a>    kaczmarj/apptainer <span class="dt">\</span></span>
+<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a>    build <span class="op">&lt;</span>IMAGE<span class="op">&gt;</span>.sif docker-daemon://<span class="op">&lt;</span>IMAGE<span class="op">&gt;</span>:<span class="op">&lt;</span>TAG<span class="op">&gt;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You already know about <code>docker run</code>, the <code>--rm</code> flag and bind mounts using <code>-v</code>. The <code>/var/run/docker.sock</code> part is the Unix socket that the Docker daemon listens to by default, meaning that it is needed for us to be able to specify the location of the Docker container we want to convert to a SIF file. The <code>kaczmarj/apptainer</code> part after the bind mounts is the image location hosted at <a href="https://hub.docker.com/r/kaczmarj/apptainer">DockerHub</a>, while the last line is the Apptainer command that actually does the conversion. All we need to do is to replace the <code>&lt;IMAGE&gt;</code> part with the Docker image we want to convert, <em>e.g.</em> <code>my_docker_image</code>.</p>
+<ul>
+<li>Replace <code>&lt;IMAGE&gt;</code> and <code>&lt;TAG&gt;</code> with one of your locally available Docker images and one of its tags and run the command - remember that you can use <code>docker image ls</code> to check what images you have available.</li>
+</ul>
+<p>In the end you’ll have a SIF file (<em>e.g.</em> <code>my_docker_image.sif</code>) that you can transfer to an HPC such as Uppmax and run whatever analyses you need. If you want to be able to do this without having to remember all the code you can check out the <a href="https://github.com/fasterius/dotfiles/blob/main/scripts/apptainer-in-docker.sh">this script</a>.</p>
+</section>
+<section id="running-apptainer" class="level3" data-number="7.2">
+<h3 data-number="7.2" class="anchored" data-anchor-id="running-apptainer"><span class="header-section-number">7.2</span> Running Apptainer</h3>
+<p>The following exercises assume that you have a login to the Uppmax HPC cluster in Uppsala, but will also work for any other system that has Apptainer installed - like if you managed to install Apptainer on your local system or have access to some other HPC cluster. Let’s try to convert the Docker image for this course directly from DockerHub:</p>
+<div class="sourceCode" id="cb33"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="ex">apptainer</span> pull mrsa_proj.sif docker://nbisweden/workshop-reproducible-research</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This should result in a SIF file called <code>mrsa_proj.sif</code>.</p>
+<p>In the Docker image we included the code needed for the workflow in the <code>/course</code> directory of the image. These files are of course also available in the Apptainer image. However, a Apptainer image is read-only. This will be a problem if we try to run the workflow within the <code>/course</code> directory, since the workflow will produce files and Snakemake will create a <code>.snakemake</code> directory. Instead, we need to provide the files externally from our host system and simply use the Apptainer image as the environment to execute the workflow in (<em>i.e.</em> all the software and dependencies).</p>
+<p>In your current working directory (<code>workshop-reproducible-research/tutorials/containers/</code>) the vital MRSA project files are already available (<code>Snakefile</code>, <code>config.yml</code> and <code>code/supplementary_material.qmd</code>). Since Apptainer bind mounts the current working directory we can simply execute the workflow and generate the output files using:</p>
+<div class="sourceCode" id="cb34"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="ex">apptainer</span> run mrsa_proj.sif</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This executes the default run command, which is <code>snakemake -rp -c 1 --configfile config.yml</code> (as defined in the original <code>Dockerfile</code>). Once completed you should see a bunch of directories and files generated in your current working directory, including the <code>results/</code> directory containing the final HTML report.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to build a Apptainer image using Apptainer inside Docker.</li>
+<li>How to convert Docker images to Apptainer images.</li>
+<li>How to run Apptainer images.</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="extra-material" class="level2" data-number="8">
+<h2 data-number="8" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">8</span> Extra material</h2>
+<p>Containers can be large and complicated, but once you start using them regularly you’ll find that you start understand these complexities. There are lots of different things you can do with images and containers in general, especially when it comes to optimising build time or final image size. Here is some small tips and tricks that you can be inspired from!</p>
+<p>If you want to read more about containers in general you can check out these resources:</p>
+<ul>
+<li>A “Get started with Docker” at the <a href="https://docs.docker.com/get-started/">Docker website</a>.</li>
+<li>An <a href="https://arxiv.org/abs/1410.0846">early paper</a> on the subject of using Docker for reproducible research.</li>
+</ul>
+<section id="building-for-multiple-platforms" class="level3" data-number="8.1">
+<h3 data-number="8.1" class="anchored" data-anchor-id="building-for-multiple-platforms"><span class="header-section-number">8.1</span> Building for multiple platforms</h3>
+<p>With the newer ARM64 architectures introduced by Apple one often runs into the problem of not having an architecture-native image to run with. This is sometimes okay since the <a href="https://support.apple.com/en-us/HT211861">Rosetta2</a> software can emulate the old AMD64 architecture on newer ARM64 computers, but results in a performance hit. One could just build for ARM64 using <code>--platform=linux/arm64</code> instead, but then somebody who <em>doesn’t</em> have the new architecture can’t run it. There is a way around this, however: <em>multi-platform builds</em>. We can build for multiple platforms at the same time and push those to <em>e.g.</em> DockerHub and anybody using those images will automatically pull the one appropriate for their computer. Here’s how to do it:</p>
+<ul>
+<li>Start by checking the available builders using <code>docker buildx ls</code>.</li>
+</ul>
+<p>You should only see the default builder, which does not have access to multi-platform builds. Let’s create a new builder that <em>does</em> have access to it:</p>
+<ul>
+<li><p>Run the following: <code>docker buildx create --name mybuilder --driver   docker-container --bootstrap</code>.</p></li>
+<li><p>Switch to using the new builder with <code>docker buildx use mybuilder</code> and check that it worked with <code>docker buildx ls</code>.</p></li>
+</ul>
+<p>All that’s needed now is to build and push the images! The following command assumes that you have an account with <code>&lt;username&gt;</code> at DockerHub and you’re pushing the <code>&lt;image&gt;</code> image:</p>
+<div class="sourceCode" id="cb35"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="ex">docker</span> buildx build <span class="at">--platform</span> linux/amd64,linux/arm64 <span class="at">-t</span> <span class="op">&lt;</span>username<span class="op">&gt;</span>/<span class="op">&lt;</span>image<span class="op">&gt;</span>:latest <span class="at">--push</span> .</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Execute the above command with your username and your image.</li>
+</ul>
+<p>That’s it! Now anybody who does <em>e.g.</em> <code>docker pull &lt;username&gt;/&lt;image&gt;</code> will get an image appropriate for their architecture whether they are on AMD64 or ARM64!</p>
+<div class="callout callout-style-default callout-note callout-titled" title="An alias to `buildx`">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+An alias to <code>buildx</code>
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>You can type <code>docker buildx install</code> to make the <code>docker build</code> into an alias for <code>docker buildx</code>, allowing you to run multi-platform builds using <code>docker build</code>. Use <code>docker buildx uninstall</code> to remove this alias.</p>
+</div>
+</div>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/pages/images/dag_mrsa.svg b/pages/images/dag_mrsa.svg
new file mode 100644
index 00000000..8bd05ab6
--- /dev/null
+++ b/pages/images/dag_mrsa.svg
@@ -0,0 +1,229 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="665pt" height="404pt"
+ viewBox="0.00 0.00 665.00 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)">
+<title>snakemake_dag</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-400 661,-400 661,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node"><title>0</title>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M316,-396C316,-396 233,-396 233,-396 227,-396 221,-390 221,-384 221,-384 221,-372 221,-372 221,-366 227,-360 233,-360 233,-360 316,-360 316,-360 322,-360 328,-366 328,-372 328,-372 328,-384 328,-384 328,-390 322,-396 316,-396"/>
+<text text-anchor="middle" x="274.5" y="-375.5" font-family="sans" font-size="10.00">get_genome_fasta</text>
+</g>
+<!-- 6 -->
+<g id="node7" class="node"><title>6</title>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M307,-324C307,-324 242,-324 242,-324 236,-324 230,-318 230,-312 230,-312 230,-300 230,-300 230,-294 236,-288 242,-288 242,-288 307,-288 307,-288 313,-288 319,-294 319,-300 319,-300 319,-312 319,-312 319,-318 313,-324 307,-324"/>
+<text text-anchor="middle" x="274.5" y="-303.5" font-family="sans" font-size="10.00">index_genome</text>
+</g>
+<!-- 0&#45;&gt;6 -->
+<g id="edge12" class="edge"><title>0&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M274.5,-359.697C274.5,-351.983 274.5,-342.712 274.5,-334.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="278,-334.104 274.5,-324.104 271,-334.104 278,-334.104"/>
+</g>
+<!-- 1 -->
+<g id="node2" class="node"><title>1</title>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M303.5,-180C303.5,-180 265.5,-180 265.5,-180 259.5,-180 253.5,-174 253.5,-168 253.5,-168 253.5,-156 253.5,-156 253.5,-150 259.5,-144 265.5,-144 265.5,-144 303.5,-144 303.5,-144 309.5,-144 315.5,-150 315.5,-156 315.5,-156 315.5,-168 315.5,-168 315.5,-174 309.5,-180 303.5,-180"/>
+<text text-anchor="middle" x="284.5" y="-159.5" font-family="sans" font-size="10.00">sort_bam</text>
+</g>
+<!-- 4 -->
+<g id="node5" class="node"><title>4</title>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M333.5,-108C333.5,-108 235.5,-108 235.5,-108 229.5,-108 223.5,-102 223.5,-96 223.5,-96 223.5,-84 223.5,-84 223.5,-78 229.5,-72 235.5,-72 235.5,-72 333.5,-72 333.5,-72 339.5,-72 345.5,-78 345.5,-84 345.5,-84 345.5,-96 345.5,-96 345.5,-102 339.5,-108 333.5,-108"/>
+<text text-anchor="middle" x="284.5" y="-87.5" font-family="sans" font-size="10.00">generate_count_table</text>
+</g>
+<!-- 1&#45;&gt;4 -->
+<g id="edge5" class="edge"><title>1&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M284.5,-143.697C284.5,-135.983 284.5,-126.712 284.5,-118.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="288,-118.104 284.5,-108.104 281,-118.104 288,-118.104"/>
+</g>
+<!-- 2 -->
+<g id="node3" class="node"><title>2</title>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M432,-252C432,-252 355,-252 355,-252 349,-252 343,-246 343,-240 343,-240 343,-228 343,-228 343,-222 349,-216 355,-216 355,-216 432,-216 432,-216 438,-216 444,-222 444,-228 444,-228 444,-240 444,-240 444,-246 438,-252 432,-252"/>
+<text text-anchor="middle" x="393.5" y="-231.5" font-family="sans" font-size="10.00">align_to_genome</text>
+</g>
+<!-- 12 -->
+<g id="node13" class="node"><title>12</title>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M402.5,-180C402.5,-180 364.5,-180 364.5,-180 358.5,-180 352.5,-174 352.5,-168 352.5,-168 352.5,-156 352.5,-156 352.5,-150 358.5,-144 364.5,-144 364.5,-144 402.5,-144 402.5,-144 408.5,-144 414.5,-150 414.5,-156 414.5,-156 414.5,-168 414.5,-168 414.5,-174 408.5,-180 402.5,-180"/>
+<text text-anchor="middle" x="383.5" y="-159.5" font-family="sans" font-size="10.00">sort_bam</text>
+</g>
+<!-- 2&#45;&gt;12 -->
+<g id="edge21" class="edge"><title>2&#45;&gt;12</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M391.028,-215.697C389.926,-207.983 388.602,-198.712 387.373,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="390.823,-189.509 385.943,-180.104 383.893,-190.499 390.823,-189.509"/>
+</g>
+<!-- 3 -->
+<g id="node4" class="node"><title>3</title>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M537,-252C537,-252 474,-252 474,-252 468,-252 462,-246 462,-240 462,-240 462,-228 462,-228 462,-222 468,-216 474,-216 474,-216 537,-216 537,-216 543,-216 549,-222 549,-228 549,-228 549,-240 549,-240 549,-246 543,-252 537,-252"/>
+<text text-anchor="middle" x="505.5" y="-237" font-family="sans" font-size="10.00">fastqc</text>
+<text text-anchor="middle" x="505.5" y="-226" font-family="sans" font-size="10.00">id: SRR935090</text>
+</g>
+<!-- 5 -->
+<g id="node6" class="node"><title>5</title>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M502.5,-180C502.5,-180 472.5,-180 472.5,-180 466.5,-180 460.5,-174 460.5,-168 460.5,-168 460.5,-156 460.5,-156 460.5,-150 466.5,-144 472.5,-144 472.5,-144 502.5,-144 502.5,-144 508.5,-144 514.5,-150 514.5,-156 514.5,-156 514.5,-168 514.5,-168 514.5,-174 508.5,-180 502.5,-180"/>
+<text text-anchor="middle" x="487.5" y="-159.5" font-family="sans" font-size="10.00">multiqc</text>
+</g>
+<!-- 3&#45;&gt;5 -->
+<g id="edge11" class="edge"><title>3&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M501.051,-215.697C499.045,-207.898 496.631,-198.509 494.399,-189.829"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="497.778,-188.918 491.898,-180.104 490.999,-190.661 497.778,-188.918"/>
+</g>
+<!-- 11 -->
+<g id="node12" class="node"><title>11</title>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M445.5,-36C445.5,-36 415.5,-36 415.5,-36 409.5,-36 403.5,-30 403.5,-24 403.5,-24 403.5,-12 403.5,-12 403.5,-6 409.5,-0 415.5,-0 415.5,-0 445.5,-0 445.5,-0 451.5,-0 457.5,-6 457.5,-12 457.5,-12 457.5,-24 457.5,-24 457.5,-30 451.5,-36 445.5,-36"/>
+<text text-anchor="middle" x="430.5" y="-15.5" font-family="sans" font-size="10.00">all</text>
+</g>
+<!-- 4&#45;&gt;11 -->
+<g id="edge19" class="edge"><title>4&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M320.216,-71.8761C342.671,-61.1096 371.561,-47.2583 394.139,-36.4334"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="395.887,-39.4767 403.391,-31.9973 392.861,-33.1646 395.887,-39.4767"/>
+</g>
+<!-- 5&#45;&gt;11 -->
+<g id="edge20" class="edge"><title>5&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M472.516,-143.973C464.563,-134.091 455.267,-121.048 449.5,-108 440.784,-88.2813 435.962,-64.2977 433.358,-46.198"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="436.798,-45.5052 432.039,-36.0392 429.856,-46.4066 436.798,-45.5052"/>
+</g>
+<!-- 6&#45;&gt;2 -->
+<g id="edge2" class="edge"><title>6&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M303.611,-287.876C319.17,-278.724 338.519,-267.342 355.276,-257.485"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="357.433,-260.277 364.277,-252.19 353.883,-254.243 357.433,-260.277"/>
+</g>
+<!-- 8 -->
+<g id="node9" class="node"><title>8</title>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M194,-252C194,-252 117,-252 117,-252 111,-252 105,-246 105,-240 105,-240 105,-228 105,-228 105,-222 111,-216 117,-216 117,-216 194,-216 194,-216 200,-216 206,-222 206,-228 206,-228 206,-240 206,-240 206,-246 200,-252 194,-252"/>
+<text text-anchor="middle" x="155.5" y="-231.5" font-family="sans" font-size="10.00">align_to_genome</text>
+</g>
+<!-- 6&#45;&gt;8 -->
+<g id="edge14" class="edge"><title>6&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M245.389,-287.876C229.83,-278.724 210.481,-267.342 193.724,-257.485"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="195.117,-254.243 184.723,-252.19 191.567,-260.277 195.117,-254.243"/>
+</g>
+<!-- 9 -->
+<g id="node10" class="node"><title>9</title>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M313,-252C313,-252 236,-252 236,-252 230,-252 224,-246 224,-240 224,-240 224,-228 224,-228 224,-222 230,-216 236,-216 236,-216 313,-216 313,-216 319,-216 325,-222 325,-228 325,-228 325,-240 325,-240 325,-246 319,-252 313,-252"/>
+<text text-anchor="middle" x="274.5" y="-231.5" font-family="sans" font-size="10.00">align_to_genome</text>
+</g>
+<!-- 6&#45;&gt;9 -->
+<g id="edge16" class="edge"><title>6&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M274.5,-287.697C274.5,-279.983 274.5,-270.712 274.5,-262.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="278,-262.104 274.5,-252.104 271,-262.104 278,-262.104"/>
+</g>
+<!-- 7 -->
+<g id="node8" class="node"><title>7</title>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M223.5,-180C223.5,-180 185.5,-180 185.5,-180 179.5,-180 173.5,-174 173.5,-168 173.5,-168 173.5,-156 173.5,-156 173.5,-150 179.5,-144 185.5,-144 185.5,-144 223.5,-144 223.5,-144 229.5,-144 235.5,-150 235.5,-156 235.5,-156 235.5,-168 235.5,-168 235.5,-174 229.5,-180 223.5,-180"/>
+<text text-anchor="middle" x="204.5" y="-159.5" font-family="sans" font-size="10.00">sort_bam</text>
+</g>
+<!-- 7&#45;&gt;4 -->
+<g id="edge7" class="edge"><title>7&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M224.275,-143.697C234.254,-134.965 246.511,-124.24 257.357,-114.75"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="259.731,-117.323 264.952,-108.104 255.122,-112.055 259.731,-117.323"/>
+</g>
+<!-- 8&#45;&gt;7 -->
+<g id="edge13" class="edge"><title>8&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M167.612,-215.697C173.368,-207.474 180.362,-197.483 186.706,-188.421"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="189.66,-190.304 192.527,-180.104 183.925,-186.29 189.66,-190.304"/>
+</g>
+<!-- 9&#45;&gt;1 -->
+<g id="edge1" class="edge"><title>9&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M276.972,-215.697C278.074,-207.983 279.398,-198.712 280.627,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="284.107,-190.499 282.057,-180.104 277.177,-189.509 284.107,-190.499"/>
+</g>
+<!-- 10 -->
+<g id="node11" class="node"><title>10</title>
+<path fill="none" stroke="#70d856" stroke-width="2" d="M558.5,-108C558.5,-108 470.5,-108 470.5,-108 464.5,-108 458.5,-102 458.5,-96 458.5,-96 458.5,-84 458.5,-84 458.5,-78 464.5,-72 470.5,-72 470.5,-72 558.5,-72 558.5,-72 564.5,-72 570.5,-78 570.5,-84 570.5,-84 570.5,-96 570.5,-96 570.5,-102 564.5,-108 558.5,-108"/>
+<text text-anchor="middle" x="514.5" y="-87.5" font-family="sans" font-size="10.00">generate_rulegraph</text>
+</g>
+<!-- 10&#45;&gt;11 -->
+<g id="edge18" class="edge"><title>10&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M493.736,-71.6966C483.259,-62.9655 470.389,-52.2405 459,-42.7503"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="460.948,-39.8174 451.025,-36.1043 456.467,-45.195 460.948,-39.8174"/>
+</g>
+<!-- 12&#45;&gt;4 -->
+<g id="edge6" class="edge"><title>12&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M359.282,-143.876C346.577,-134.893 330.836,-123.763 317.077,-114.034"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="318.997,-111.105 308.811,-108.19 314.956,-116.821 318.997,-111.105"/>
+</g>
+<!-- 13 -->
+<g id="node14" class="node"><title>13</title>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M75,-252C75,-252 12,-252 12,-252 6,-252 0,-246 0,-240 0,-240 0,-228 0,-228 0,-222 6,-216 12,-216 12,-216 75,-216 75,-216 81,-216 87,-222 87,-228 87,-228 87,-240 87,-240 87,-246 81,-252 75,-252"/>
+<text text-anchor="middle" x="43.5" y="-237" font-family="sans" font-size="10.00">fastqc</text>
+<text text-anchor="middle" x="43.5" y="-226" font-family="sans" font-size="10.00">id: SRR935091</text>
+</g>
+<!-- 13&#45;&gt;5 -->
+<g id="edge10" class="edge"><title>13&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M87.013,-218.052C89.8682,-217.295 92.7148,-216.601 95.5,-216 238.85,-185.05 279.335,-206.901 423.5,-180 432.273,-178.363 441.61,-176.156 450.362,-173.875"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="451.557,-177.177 460.299,-171.192 449.733,-170.419 451.557,-177.177"/>
+</g>
+<!-- 14 -->
+<g id="node15" class="node"><title>14</title>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M642,-252C642,-252 579,-252 579,-252 573,-252 567,-246 567,-240 567,-240 567,-228 567,-228 567,-222 573,-216 579,-216 579,-216 642,-216 642,-216 648,-216 654,-222 654,-228 654,-228 654,-240 654,-240 654,-246 648,-252 642,-252"/>
+<text text-anchor="middle" x="610.5" y="-237" font-family="sans" font-size="10.00">fastqc</text>
+<text text-anchor="middle" x="610.5" y="-226" font-family="sans" font-size="10.00">id: SRR935092</text>
+</g>
+<!-- 14&#45;&gt;5 -->
+<g id="edge9" class="edge"><title>14&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M580.411,-215.876C563.332,-206.156 541.835,-193.922 523.821,-183.671"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="525.183,-180.419 514.761,-178.514 521.721,-186.502 525.183,-180.419"/>
+</g>
+<!-- 15 -->
+<g id="node16" class="node"><title>15</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M645,-324C645,-324 542,-324 542,-324 536,-324 530,-318 530,-312 530,-312 530,-300 530,-300 530,-294 536,-288 542,-288 542,-288 645,-288 645,-288 651,-288 657,-294 657,-300 657,-300 657,-312 657,-312 657,-318 651,-324 645,-324"/>
+<text text-anchor="middle" x="593.5" y="-309" font-family="sans" font-size="10.00">get_SRA_by_accession</text>
+<text text-anchor="middle" x="593.5" y="-298" font-family="sans" font-size="10.00">sra_id: SRR935092</text>
+</g>
+<!-- 15&#45;&gt;2 -->
+<g id="edge3" class="edge"><title>15&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M544.83,-287.966C516.886,-278.185 481.604,-265.836 452.139,-255.524"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="452.983,-252.111 442.388,-252.111 450.67,-258.718 452.983,-252.111"/>
+</g>
+<!-- 15&#45;&gt;14 -->
+<g id="edge23" class="edge"><title>15&#45;&gt;14</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M597.702,-287.697C599.596,-279.898 601.877,-270.509 603.984,-261.829"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="607.387,-262.648 606.346,-252.104 600.585,-260.996 607.387,-262.648"/>
+</g>
+<!-- 16 -->
+<g id="node17" class="node"><title>16</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M148,-324C148,-324 45,-324 45,-324 39,-324 33,-318 33,-312 33,-312 33,-300 33,-300 33,-294 39,-288 45,-288 45,-288 148,-288 148,-288 154,-288 160,-294 160,-300 160,-300 160,-312 160,-312 160,-318 154,-324 148,-324"/>
+<text text-anchor="middle" x="96.5" y="-309" font-family="sans" font-size="10.00">get_SRA_by_accession</text>
+<text text-anchor="middle" x="96.5" y="-298" font-family="sans" font-size="10.00">sra_id: SRR935091</text>
+</g>
+<!-- 16&#45;&gt;8 -->
+<g id="edge15" class="edge"><title>16&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M111.084,-287.697C118.158,-279.305 126.783,-269.07 134.545,-259.861"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="137.315,-262.006 141.083,-252.104 131.963,-257.495 137.315,-262.006"/>
+</g>
+<!-- 16&#45;&gt;13 -->
+<g id="edge22" class="edge"><title>16&#45;&gt;13</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M83.3989,-287.697C77.1091,-279.389 69.4526,-269.277 62.5351,-260.141"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="65.2773,-257.964 56.4504,-252.104 59.6964,-262.19 65.2773,-257.964"/>
+</g>
+<!-- 17 -->
+<g id="node18" class="node"><title>17</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M484,-324C484,-324 381,-324 381,-324 375,-324 369,-318 369,-312 369,-312 369,-300 369,-300 369,-294 375,-288 381,-288 381,-288 484,-288 484,-288 490,-288 496,-294 496,-300 496,-300 496,-312 496,-312 496,-318 490,-324 484,-324"/>
+<text text-anchor="middle" x="432.5" y="-309" font-family="sans" font-size="10.00">get_SRA_by_accession</text>
+<text text-anchor="middle" x="432.5" y="-298" font-family="sans" font-size="10.00">sra_id: SRR935090</text>
+</g>
+<!-- 17&#45;&gt;3 -->
+<g id="edge4" class="edge"><title>17&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M450.545,-287.697C459.562,-279.05 470.618,-268.449 480.443,-259.027"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="482.867,-261.552 487.663,-252.104 478.022,-256.499 482.867,-261.552"/>
+</g>
+<!-- 17&#45;&gt;9 -->
+<g id="edge17" class="edge"><title>17&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M393.849,-287.876C372.249,-278.307 345.149,-266.3 322.234,-256.148"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="323.586,-252.919 313.026,-252.068 320.751,-259.319 323.586,-252.919"/>
+</g>
+<!-- 18 -->
+<g id="node19" class="node"><title>18</title>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M143,-180C143,-180 66,-180 66,-180 60,-180 54,-174 54,-168 54,-168 54,-156 54,-156 54,-150 60,-144 66,-144 66,-144 143,-144 143,-144 149,-144 155,-150 155,-156 155,-156 155,-168 155,-168 155,-174 149,-180 143,-180"/>
+<text text-anchor="middle" x="104.5" y="-159.5" font-family="sans" font-size="10.00">get_genome_gff3</text>
+</g>
+<!-- 18&#45;&gt;4 -->
+<g id="edge8" class="edge"><title>18&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M148.533,-143.876C173.468,-134.179 204.838,-121.98 231.162,-111.743"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="232.559,-114.955 240.61,-108.068 230.021,-108.431 232.559,-114.955"/>
+</g>
+</g>
+</svg>
diff --git a/pages/images/jobgraph.svg b/pages/images/jobgraph.svg
new file mode 100644
index 00000000..d5788c73
--- /dev/null
+++ b/pages/images/jobgraph.svg
@@ -0,0 +1,42 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="284pt" height="121pt"
+ viewBox="0.00 0.00 284.00 121.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 117)">
+<title>snakemake_dag</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-117 280,-117 280,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node"><title>0</title>
+<path fill="none" stroke="#59d856" stroke-width="2" stroke-dasharray="5,2" d="M176.5,-41C176.5,-41 98.5,-41 98.5,-41 92.5,-41 86.5,-35 86.5,-29 86.5,-29 86.5,-12 86.5,-12 86.5,-6 92.5,-0 98.5,-0 98.5,-0 176.5,-0 176.5,-0 182.5,-0 188.5,-6 188.5,-12 188.5,-12 188.5,-29 188.5,-29 188.5,-35 182.5,-41 176.5,-41"/>
+<text text-anchor="middle" x="137.5" y="-29" font-family="sans" font-size="10.00">concatenate_files</text>
+<text text-anchor="middle" x="137.5" y="-18" font-family="sans" font-size="10.00">first: a</text>
+<text text-anchor="middle" x="137.5" y="-7" font-family="sans" font-size="10.00">second: b</text>
+</g>
+<!-- 1 -->
+<g id="node2" class="node"><title>1</title>
+<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M117,-113C117,-113 12,-113 12,-113 6,-113 0,-107 0,-101 0,-101 0,-89 0,-89 0,-83 6,-77 12,-77 12,-77 117,-77 117,-77 123,-77 129,-83 129,-89 129,-89 129,-101 129,-101 129,-107 123,-113 117,-113"/>
+<text text-anchor="middle" x="64.5" y="-98" font-family="sans" font-size="10.00">convert_to_upper_case</text>
+<text text-anchor="middle" x="64.5" y="-87" font-family="sans" font-size="10.00">some_name: b</text>
+</g>
+<!-- 1&#45;&gt;0 -->
+<g id="edge1" class="edge"><title>1&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M81.8,-76.8185C90.4011,-68.2763 100.991,-57.7592 110.619,-48.1971"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="113.116,-50.6497 117.745,-41.1195 108.183,-45.6829 113.116,-50.6497"/>
+</g>
+<!-- 2 -->
+<g id="node3" class="node"><title>2</title>
+<path fill="none" stroke="#d85656" stroke-width="2" stroke-dasharray="5,2" d="M264,-113C264,-113 159,-113 159,-113 153,-113 147,-107 147,-101 147,-101 147,-89 147,-89 147,-83 153,-77 159,-77 159,-77 264,-77 264,-77 270,-77 276,-83 276,-89 276,-89 276,-101 276,-101 276,-107 270,-113 264,-113"/>
+<text text-anchor="middle" x="211.5" y="-98" font-family="sans" font-size="10.00">convert_to_upper_case</text>
+<text text-anchor="middle" x="211.5" y="-87" font-family="sans" font-size="10.00">some_name: a</text>
+</g>
+<!-- 2&#45;&gt;0 -->
+<g id="edge2" class="edge"><title>2&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M193.963,-76.8185C185.244,-68.2763 174.509,-57.7592 164.749,-48.1971"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="167.118,-45.6178 157.525,-41.1195 162.219,-50.618 167.118,-45.6178"/>
+</g>
+</g>
+</svg>
diff --git a/pages/images/jupyter_basic_update.png b/pages/images/jupyter_basic_update.png
new file mode 100644
index 00000000..ac02f7b5
Binary files /dev/null and b/pages/images/jupyter_basic_update.png differ
diff --git a/pages/images/jupyter_dashboard.png b/pages/images/jupyter_dashboard.png
new file mode 100644
index 00000000..9c9748cd
Binary files /dev/null and b/pages/images/jupyter_dashboard.png differ
diff --git a/pages/images/jupyter_empty_nb.png b/pages/images/jupyter_empty_nb.png
new file mode 100644
index 00000000..f19bbaf3
Binary files /dev/null and b/pages/images/jupyter_empty_nb.png differ
diff --git a/pages/images/jupyter_widget.png b/pages/images/jupyter_widget.png
new file mode 100644
index 00000000..7b7ea221
Binary files /dev/null and b/pages/images/jupyter_widget.png differ
diff --git a/pages/images/jupyterlab_dashboard.png b/pages/images/jupyterlab_dashboard.png
new file mode 100644
index 00000000..190a3dc2
Binary files /dev/null and b/pages/images/jupyterlab_dashboard.png differ
diff --git a/pages/images/rulegraph.svg b/pages/images/rulegraph.svg
new file mode 100644
index 00000000..05c8c214
--- /dev/null
+++ b/pages/images/rulegraph.svg
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="137pt" height="116pt"
+ viewBox="0.00 0.00 137.00 116.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 112)">
+<title>snakemake_dag</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-112 133,-112 133,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node"><title>0</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M103.5,-36C103.5,-36 25.5,-36 25.5,-36 19.5,-36 13.5,-30 13.5,-24 13.5,-24 13.5,-12 13.5,-12 13.5,-6 19.5,-0 25.5,-0 25.5,-0 103.5,-0 103.5,-0 109.5,-0 115.5,-6 115.5,-12 115.5,-12 115.5,-24 115.5,-24 115.5,-30 109.5,-36 103.5,-36"/>
+<text text-anchor="middle" x="64.5" y="-15.5" font-family="sans" font-size="10.00">concatenate_files</text>
+</g>
+<!-- 1 -->
+<g id="node2" class="node"><title>1</title>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M117,-108C117,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 117,-72 117,-72 123,-72 129,-78 129,-84 129,-84 129,-96 129,-96 129,-102 123,-108 117,-108"/>
+<text text-anchor="middle" x="64.5" y="-87.5" font-family="sans" font-size="10.00">convert_to_upper_case</text>
+</g>
+<!-- 1&#45;&gt;0 -->
+<g id="edge1" class="edge"><title>1&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M64.5,-71.6966C64.5,-63.9827 64.5,-54.7125 64.5,-46.1124"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="68.0001,-46.1043 64.5,-36.1043 61.0001,-46.1044 68.0001,-46.1043"/>
+</g>
+</g>
+</svg>
diff --git a/pages/images/rulegraph_complex.svg b/pages/images/rulegraph_complex.svg
new file mode 100644
index 00000000..1639ce54
--- /dev/null
+++ b/pages/images/rulegraph_complex.svg
@@ -0,0 +1,1478 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.26.0 (20091210.2329)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="864pt" height="453pt"
+ viewBox="0.00 0.00 864.00 452.53" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph1" class="graph" transform="scale(0.40261 0.40261) rotate(0) translate(4 1120)">
+<title>snakemake_dag</title>
+<polygon fill="white" stroke="white" points="-4,5 -4,-1120 2143,-1120 2143,5 -4,5"/>
+<!-- 0 -->
+<g id="node1" class="node"><title>0</title>
+<polyline fill="none" stroke="#56d863" stroke-width="2" points="222,-612 160,-612 "/>
+<path fill="none" stroke="#56d863" stroke-width="2" d="M160,-612C154,-612 148,-606 148,-600"/>
+<polyline fill="none" stroke="#56d863" stroke-width="2" points="148,-600 148,-588 "/>
+<path fill="none" stroke="#56d863" stroke-width="2" d="M148,-588C148,-582 154,-576 160,-576"/>
+<polyline fill="none" stroke="#56d863" stroke-width="2" points="160,-576 222,-576 "/>
+<path fill="none" stroke="#56d863" stroke-width="2" d="M222,-576C228,-576 234,-582 234,-588"/>
+<polyline fill="none" stroke="#56d863" stroke-width="2" points="234,-588 234,-600 "/>
+<path fill="none" stroke="#56d863" stroke-width="2" d="M234,-600C234,-606 228,-612 222,-612"/>
+<text text-anchor="middle" x="191" y="-591.5" font-family="sans" font-size="10.00">rulegraph_png</text>
+</g>
+<!-- 45 -->
+<g id="node46" class="node"><title>45</title>
+<polyline fill="none" stroke="#d8ac56" stroke-width="2" points="241,-540 171,-540 "/>
+<path fill="none" stroke="#d8ac56" stroke-width="2" d="M171,-540C165,-540 159,-534 159,-528"/>
+<polyline fill="none" stroke="#d8ac56" stroke-width="2" points="159,-528 159,-516 "/>
+<path fill="none" stroke="#d8ac56" stroke-width="2" d="M159,-516C159,-510 165,-504 171,-504"/>
+<polyline fill="none" stroke="#d8ac56" stroke-width="2" points="171,-504 241,-504 "/>
+<path fill="none" stroke="#d8ac56" stroke-width="2" d="M241,-504C247,-504 253,-510 253,-516"/>
+<polyline fill="none" stroke="#d8ac56" stroke-width="2" points="253,-516 253,-528 "/>
+<path fill="none" stroke="#d8ac56" stroke-width="2" d="M253,-528C253,-534 247,-540 241,-540"/>
+<text text-anchor="middle" x="206" y="-519.5" font-family="sans" font-size="10.00">atacseq_report</text>
+</g>
+<!-- 0&#45;&gt;45 -->
+<g id="edge214" class="edge"><title>0&#45;&gt;45</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M194.785,-575.831C196.389,-568.131 198.297,-558.974 200.08,-550.417"/>
+<polygon fill="grey" stroke="grey" points="203.551,-550.917 202.164,-540.413 196.698,-549.489 203.551,-550.917"/>
+</g>
+<!-- 1 -->
+<g id="node2" class="node"><title>1</title>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="634,-612 468,-612 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M468,-612C462,-612 456,-606 456,-600"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="456,-600 456,-588 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M456,-588C456,-582 462,-576 468,-576"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="468,-576 634,-576 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M634,-576C640,-576 646,-582 646,-588"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="646,-588 646,-600 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M646,-600C646,-606 640,-612 634,-612"/>
+<text text-anchor="middle" x="551" y="-591.5" font-family="sans" font-size="10.00">atacseq_aggregate_picard_results</text>
+</g>
+<!-- 10 -->
+<g id="node11" class="node"><title>10</title>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="759,-180 647,-180 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M647,-180C641,-180 635,-174 635,-168"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="635,-168 635,-156 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M635,-156C635,-150 641,-144 647,-144"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="647,-144 759,-144 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M759,-144C765,-144 771,-150 771,-156"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="771,-156 771,-168 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M771,-168C771,-174 765,-180 759,-180"/>
+<text text-anchor="middle" x="703" y="-159.5" font-family="sans" font-size="10.00">generate_paper_figures</text>
+</g>
+<!-- 1&#45;&gt;10 -->
+<g id="edge38" class="edge"><title>1&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M518.037,-575.834C480.111,-552.233 423,-507.198 423,-450 423,-450 423,-450 423,-306 423,-265.118 413.034,-244.849 442,-216 467.376,-190.726 557.5,-176.273 624.787,-168.795"/>
+<polygon fill="grey" stroke="grey" points="625.39,-172.25 634.957,-167.698 624.639,-165.291 625.39,-172.25"/>
+</g>
+<!-- 1&#45;&gt;45 -->
+<g id="edge216" class="edge"><title>1&#45;&gt;45</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M464.387,-575.924C401.439,-562.787 318.004,-545.375 263.204,-533.938"/>
+<polygon fill="grey" stroke="grey" points="263.82,-530.491 253.316,-531.875 262.39,-537.344 263.82,-530.491"/>
+</g>
+<!-- 2 -->
+<g id="node3" class="node"><title>2</title>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="383,-540 351,-540 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M351,-540C345,-540 339,-534 339,-528"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="339,-528 339,-516 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M339,-516C339,-510 345,-504 351,-504"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="351,-504 383,-504 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M383,-504C389,-504 395,-510 395,-516"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="395,-516 395,-528 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M395,-528C395,-534 389,-540 383,-540"/>
+<text text-anchor="middle" x="367" y="-519.5" font-family="sans" font-size="10.00">pdf2png</text>
+</g>
+<!-- 11 -->
+<g id="node12" class="node"><title>11</title>
+<polyline fill="none" stroke="#d86656" stroke-width="2" points="383,-468 343,-468 "/>
+<path fill="none" stroke="#d86656" stroke-width="2" d="M343,-468C337,-468 331,-462 331,-456"/>
+<polyline fill="none" stroke="#d86656" stroke-width="2" points="331,-456 331,-444 "/>
+<path fill="none" stroke="#d86656" stroke-width="2" d="M331,-444C331,-438 337,-432 343,-432"/>
+<polyline fill="none" stroke="#d86656" stroke-width="2" points="343,-432 383,-432 "/>
+<path fill="none" stroke="#d86656" stroke-width="2" d="M383,-432C389,-432 395,-438 395,-444"/>
+<polyline fill="none" stroke="#d86656" stroke-width="2" points="395,-444 395,-456 "/>
+<path fill="none" stroke="#d86656" stroke-width="2" d="M395,-456C395,-462 389,-468 383,-468"/>
+<text text-anchor="middle" x="363" y="-447.5" font-family="sans" font-size="10.00">qc_report</text>
+</g>
+<!-- 2&#45;&gt;11 -->
+<g id="edge48" class="edge"><title>2&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M365.991,-503.831C365.563,-496.131 365.054,-486.974 364.579,-478.417"/>
+<polygon fill="grey" stroke="grey" points="368.072,-478.204 364.023,-468.413 361.083,-478.592 368.072,-478.204"/>
+</g>
+<!-- 3 -->
+<g id="node4" class="node"><title>3</title>
+<polyline fill="none" stroke="#5682d8" stroke-width="2" points="833,-396 697,-396 "/>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M697,-396C691,-396 685,-390 685,-384"/>
+<polyline fill="none" stroke="#5682d8" stroke-width="2" points="685,-384 685,-372 "/>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M685,-372C685,-366 691,-360 697,-360"/>
+<polyline fill="none" stroke="#5682d8" stroke-width="2" points="697,-360 833,-360 "/>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M833,-360C839,-360 845,-366 845,-372"/>
+<polyline fill="none" stroke="#5682d8" stroke-width="2" points="845,-372 845,-384 "/>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M845,-384C845,-390 839,-396 833,-396"/>
+<text text-anchor="middle" x="765" y="-375.5" font-family="sans" font-size="10.00">download_FAIRE_and_DNASE</text>
+</g>
+<!-- 16 -->
+<g id="node17" class="node"><title>16</title>
+<polyline fill="none" stroke="#d87556" stroke-width="2" points="841,-324 689,-324 "/>
+<path fill="none" stroke="#d87556" stroke-width="2" d="M689,-324C683,-324 677,-318 677,-312"/>
+<polyline fill="none" stroke="#d87556" stroke-width="2" points="677,-312 677,-300 "/>
+<path fill="none" stroke="#d87556" stroke-width="2" d="M677,-300C677,-294 683,-288 689,-288"/>
+<polyline fill="none" stroke="#d87556" stroke-width="2" points="689,-288 841,-288 "/>
+<path fill="none" stroke="#d87556" stroke-width="2" d="M841,-288C847,-288 853,-294 853,-300"/>
+<polyline fill="none" stroke="#d87556" stroke-width="2" points="853,-300 853,-312 "/>
+<path fill="none" stroke="#d87556" stroke-width="2" d="M853,-312C853,-318 847,-324 841,-324"/>
+<text text-anchor="middle" x="765" y="-303.5" font-family="sans" font-size="10.00">compare_to_FAIRE_and_DNASE</text>
+</g>
+<!-- 3&#45;&gt;16 -->
+<g id="edge70" class="edge"><title>3&#45;&gt;16</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M765,-359.831C765,-352.131 765,-342.974 765,-334.417"/>
+<polygon fill="grey" stroke="grey" points="768.5,-334.413 765,-324.413 761.5,-334.413 768.5,-334.413"/>
+</g>
+<!-- 4 -->
+<g id="node5" class="node"><title>4</title>
+<polyline fill="none" stroke="#56b9d8" stroke-width="2" points="326,-756 146,-756 "/>
+<path fill="none" stroke="#56b9d8" stroke-width="2" d="M146,-756C140,-756 134,-750 134,-744"/>
+<polyline fill="none" stroke="#56b9d8" stroke-width="2" points="134,-744 134,-732 "/>
+<path fill="none" stroke="#56b9d8" stroke-width="2" d="M134,-732C134,-726 140,-720 146,-720"/>
+<polyline fill="none" stroke="#56b9d8" stroke-width="2" points="146,-720 326,-720 "/>
+<path fill="none" stroke="#56b9d8" stroke-width="2" d="M326,-720C332,-720 338,-726 338,-732"/>
+<polyline fill="none" stroke="#56b9d8" stroke-width="2" points="338,-732 338,-744 "/>
+<path fill="none" stroke="#56b9d8" stroke-width="2" d="M338,-744C338,-750 332,-756 326,-756"/>
+<text text-anchor="middle" x="236" y="-735.5" font-family="sans" font-size="10.00">atacseq_aggregate_qualimap_results</text>
+</g>
+<!-- 4&#45;&gt;45 -->
+<g id="edge218" class="edge"><title>4&#45;&gt;45</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M192.748,-719.775C177.258,-711.139 161.137,-699.305 151,-684 133.086,-656.953 141.676,-644.331 139,-612 137.68,-596.055 132.095,-590.433 139,-576 144.596,-564.303 154.099,-554.33 164.217,-546.256"/>
+<polygon fill="grey" stroke="grey" points="166.45,-548.958 172.4,-540.192 162.282,-543.334 166.45,-548.958"/>
+</g>
+<!-- 5 -->
+<g id="node6" class="node"><title>5</title>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="194,-1044 12,-1044 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M12,-1044C6,-1044 8.28967e-14,-1038 6.63173e-14,-1032"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="6.63173e-14,-1032 3.31587e-14,-1020 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M3.31587e-14,-1020C1.65793e-14,-1014 6,-1008 12,-1008"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="12,-1008 194,-1008 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M194,-1008C200,-1008 206,-1014 206,-1020"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="206,-1020 206,-1032 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M206,-1032C206,-1038 200,-1044 194,-1044"/>
+<text text-anchor="middle" x="103" y="-1023.5" font-family="sans" font-size="10.00">atacseq_aggregate_cutadapt_results</text>
+</g>
+<!-- 5&#45;&gt;45 -->
+<g id="edge220" class="edge"><title>5&#45;&gt;45</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M98.6488,-1007.94C92.4978,-980.779 82,-927.729 82,-882 82,-882 82,-882 82,-666 82,-613.466 129.975,-570.572 166.243,-545.651"/>
+<polygon fill="grey" stroke="grey" points="168.33,-548.467 174.71,-540.009 164.447,-542.642 168.33,-548.467"/>
+</g>
+<!-- 6 -->
+<g id="node7" class="node"><title>6</title>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="1037,-972 897,-972 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M897,-972C891,-972 885,-966 885,-960"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="885,-960 885,-948 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M885,-948C885,-942 891,-936 897,-936"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="897,-936 1037,-936 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M1037,-936C1043,-936 1049,-942 1049,-948"/>
+<polyline fill="none" stroke="#ced856" stroke-width="2" points="1049,-948 1049,-960 "/>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M1049,-960C1049,-966 1043,-972 1037,-972"/>
+<text text-anchor="middle" x="967" y="-951.5" font-family="sans" font-size="10.00">atacseq_correct_coordinates</text>
+</g>
+<!-- 7 -->
+<g id="node8" class="node"><title>7</title>
+<polyline fill="none" stroke="#56d86b" stroke-width="2" points="978,-756 912,-756 "/>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M912,-756C906,-756 900,-750 900,-744"/>
+<polyline fill="none" stroke="#56d86b" stroke-width="2" points="900,-744 900,-732 "/>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M900,-732C900,-726 906,-720 912,-720"/>
+<polyline fill="none" stroke="#56d86b" stroke-width="2" points="912,-720 978,-720 "/>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M978,-720C984,-720 990,-726 990,-732"/>
+<polyline fill="none" stroke="#56d86b" stroke-width="2" points="990,-732 990,-744 "/>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M990,-744C990,-750 984,-756 978,-756"/>
+<text text-anchor="middle" x="945" y="-735.5" font-family="sans" font-size="10.00">bam_summary</text>
+</g>
+<!-- 6&#45;&gt;7 -->
+<g id="edge26" class="edge"><title>6&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M965.162,-935.955C961.328,-898.314 952.454,-811.187 947.865,-766.125"/>
+<polygon fill="grey" stroke="grey" points="951.343,-765.739 946.848,-756.145 944.379,-766.448 951.343,-765.739"/>
+</g>
+<!-- 9 -->
+<g id="node10" class="node"><title>9</title>
+<polyline fill="none" stroke="#d85e56" stroke-width="2" points="787,-900 691,-900 "/>
+<path fill="none" stroke="#d85e56" stroke-width="2" d="M691,-900C685,-900 679,-894 679,-888"/>
+<polyline fill="none" stroke="#d85e56" stroke-width="2" points="679,-888 679,-876 "/>
+<path fill="none" stroke="#d85e56" stroke-width="2" d="M679,-876C679,-870 685,-864 691,-864"/>
+<polyline fill="none" stroke="#d85e56" stroke-width="2" points="691,-864 787,-864 "/>
+<path fill="none" stroke="#d85e56" stroke-width="2" d="M787,-864C793,-864 799,-870 799,-876"/>
+<polyline fill="none" stroke="#d85e56" stroke-width="2" points="799,-876 799,-888 "/>
+<path fill="none" stroke="#d85e56" stroke-width="2" d="M799,-888C799,-894 793,-900 787,-900"/>
+<text text-anchor="middle" x="739" y="-879.5" font-family="sans" font-size="10.00">merge_bam_healthy</text>
+</g>
+<!-- 6&#45;&gt;9 -->
+<g id="edge30" class="edge"><title>6&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M909.76,-935.924C878.185,-925.953 838.81,-913.519 805.829,-903.104"/>
+<polygon fill="grey" stroke="grey" points="806.853,-899.757 796.263,-900.083 804.745,-906.432 806.853,-899.757"/>
+</g>
+<!-- 12 -->
+<g id="node13" class="node"><title>12</title>
+<polyline fill="none" stroke="#d85656" stroke-width="2" points="927,-900 829,-900 "/>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M829,-900C823,-900 817,-894 817,-888"/>
+<polyline fill="none" stroke="#d85656" stroke-width="2" points="817,-888 817,-876 "/>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M817,-876C817,-870 823,-864 829,-864"/>
+<polyline fill="none" stroke="#d85656" stroke-width="2" points="829,-864 927,-864 "/>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M927,-864C933,-864 939,-870 939,-876"/>
+<polyline fill="none" stroke="#d85656" stroke-width="2" points="939,-876 939,-888 "/>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M939,-888C939,-894 933,-900 927,-900"/>
+<text text-anchor="middle" x="878" y="-879.5" font-family="sans" font-size="10.00">merge_bam_diabetic</text>
+</g>
+<!-- 6&#45;&gt;12 -->
+<g id="edge52" class="edge"><title>6&#45;&gt;12</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M944.542,-935.831C933.58,-926.963 920.23,-916.164 908.374,-906.572"/>
+<polygon fill="grey" stroke="grey" points="910.427,-903.731 900.451,-900.163 906.024,-909.173 910.427,-903.731"/>
+</g>
+<!-- 17 -->
+<g id="node18" class="node"><title>17</title>
+<polyline fill="none" stroke="#56d892" stroke-width="2" points="2046,-36 1954,-36 "/>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M1954,-36C1948,-36 1942,-30 1942,-24"/>
+<polyline fill="none" stroke="#56d892" stroke-width="2" points="1942,-24 1942,-12 "/>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M1942,-12C1942,-6 1948,-1.70285e-14 1954,-1.62935e-14"/>
+<polyline fill="none" stroke="#56d892" stroke-width="2" points="1954,-1.62935e-14 2046,-5.0228e-15 "/>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M2046,-5.0228e-15C2052,-4.28776e-15 2058,-6 2058,-12"/>
+<polyline fill="none" stroke="#56d892" stroke-width="2" points="2058,-12 2058,-24 "/>
+<path fill="none" stroke="#56d892" stroke-width="2" d="M2058,-24C2058,-30 2052,-36 2046,-36"/>
+<text text-anchor="middle" x="2000" y="-15.5" font-family="sans" font-size="10.00">copy_to_webexport</text>
+</g>
+<!-- 6&#45;&gt;17 -->
+<g id="edge82" class="edge"><title>6&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1049.34,-952.534C1315.89,-946.709 2138,-919.84 2138,-810 2138,-810 2138,-810 2138,-162 2138,-107.435 2085.71,-65.4825 2045.51,-41.2906"/>
+<polygon fill="grey" stroke="grey" points="2047.02,-38.1224 2036.62,-36.1087 2043.49,-44.1695 2047.02,-38.1224"/>
+</g>
+<!-- 23 -->
+<g id="node24" class="node"><title>23</title>
+<polyline fill="none" stroke="#56d87b" stroke-width="2" points="643,-828 577,-828 "/>
+<path fill="none" stroke="#56d87b" stroke-width="2" d="M577,-828C571,-828 565,-822 565,-816"/>
+<polyline fill="none" stroke="#56d87b" stroke-width="2" points="565,-816 565,-804 "/>
+<path fill="none" stroke="#56d87b" stroke-width="2" d="M565,-804C565,-798 571,-792 577,-792"/>
+<polyline fill="none" stroke="#56d87b" stroke-width="2" points="577,-792 643,-792 "/>
+<path fill="none" stroke="#56d87b" stroke-width="2" d="M643,-792C649,-792 655,-798 655,-804"/>
+<polyline fill="none" stroke="#56d87b" stroke-width="2" points="655,-804 655,-816 "/>
+<path fill="none" stroke="#56d87b" stroke-width="2" d="M655,-816C655,-822 649,-828 643,-828"/>
+<text text-anchor="middle" x="610" y="-807.5" font-family="sans" font-size="10.00">phantompeaks</text>
+</g>
+<!-- 6&#45;&gt;23 -->
+<g id="edge102" class="edge"><title>6&#45;&gt;23</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M884.796,-947.638C806.116,-940.142 694.329,-925.376 659,-900 638.11,-884.995 625.187,-858.234 617.895,-837.78"/>
+<polygon fill="grey" stroke="grey" points="621.136,-836.434 614.67,-828.041 614.491,-838.634 621.136,-836.434"/>
+</g>
+<!-- 37 -->
+<g id="node38" class="node"><title>37</title>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="795,-828 685,-828 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M685,-828C679,-828 673,-822 673,-816"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="673,-816 673,-804 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M673,-804C673,-798 679,-792 685,-792"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="685,-792 795,-792 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M795,-792C801,-792 807,-798 807,-804"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="807,-804 807,-816 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M807,-816C807,-822 801,-828 795,-828"/>
+<text text-anchor="middle" x="740" y="-807.5" font-family="sans" font-size="10.00">picard_build_bam_index</text>
+</g>
+<!-- 6&#45;&gt;37 -->
+<g id="edge156" class="edge"><title>6&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M884.727,-950.518C806.506,-945.276 696.906,-932.239 670,-900 659.748,-887.716 662.942,-878.359 670,-864 675.823,-852.153 685.635,-842.171 696.098,-834.138"/>
+<polygon fill="grey" stroke="grey" points="698.445,-836.764 704.565,-828.116 694.387,-831.06 698.445,-836.764"/>
+</g>
+<!-- 39 -->
+<g id="node40" class="node"><title>39</title>
+<polyline fill="none" stroke="#88d856" stroke-width="2" points="1097,-900 1045,-900 "/>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M1045,-900C1039,-900 1033,-894 1033,-888"/>
+<polyline fill="none" stroke="#88d856" stroke-width="2" points="1033,-888 1033,-876 "/>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M1033,-876C1033,-870 1039,-864 1045,-864"/>
+<polyline fill="none" stroke="#88d856" stroke-width="2" points="1045,-864 1097,-864 "/>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M1097,-864C1103,-864 1109,-870 1109,-876"/>
+<polyline fill="none" stroke="#88d856" stroke-width="2" points="1109,-876 1109,-888 "/>
+<path fill="none" stroke="#88d856" stroke-width="2" d="M1109,-888C1109,-894 1103,-900 1097,-900"/>
+<text text-anchor="middle" x="1071" y="-879.5" font-family="sans" font-size="10.00">merge_bam</text>
+</g>
+<!-- 6&#45;&gt;39 -->
+<g id="edge170" class="edge"><title>6&#45;&gt;39</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M993.244,-935.831C1006.3,-926.793 1022.25,-915.748 1036.3,-906.02"/>
+<polygon fill="grey" stroke="grey" points="1038.54,-908.733 1044.76,-900.163 1034.55,-902.977 1038.54,-908.733"/>
+</g>
+<!-- 61 -->
+<g id="node62" class="node"><title>61</title>
+<polyline fill="none" stroke="#56d85b" stroke-width="2" points="1079,-828 999,-828 "/>
+<path fill="none" stroke="#56d85b" stroke-width="2" d="M999,-828C993,-828 987,-822 987,-816"/>
+<polyline fill="none" stroke="#56d85b" stroke-width="2" points="987,-816 987,-804 "/>
+<path fill="none" stroke="#56d85b" stroke-width="2" d="M987,-804C987,-798 993,-792 999,-792"/>
+<polyline fill="none" stroke="#56d85b" stroke-width="2" points="999,-792 1079,-792 "/>
+<path fill="none" stroke="#56d85b" stroke-width="2" d="M1079,-792C1085,-792 1091,-798 1091,-804"/>
+<polyline fill="none" stroke="#56d85b" stroke-width="2" points="1091,-804 1091,-816 "/>
+<path fill="none" stroke="#56d85b" stroke-width="2" d="M1091,-816C1091,-822 1085,-828 1079,-828"/>
+<text text-anchor="middle" x="1039" y="-807.5" font-family="sans" font-size="10.00">bam_subsampling</text>
+</g>
+<!-- 6&#45;&gt;61 -->
+<g id="edge268" class="edge"><title>6&#45;&gt;61</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M976.119,-935.762C988.562,-910.876 1010.96,-866.082 1025.39,-837.216"/>
+<polygon fill="grey" stroke="grey" points="1028.61,-838.599 1029.96,-828.09 1022.35,-835.469 1028.61,-838.599"/>
+</g>
+<!-- 40 -->
+<g id="node41" class="node"><title>40</title>
+<polyline fill="none" stroke="#d8a456" stroke-width="2" points="746,-612 676,-612 "/>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M676,-612C670,-612 664,-606 664,-600"/>
+<polyline fill="none" stroke="#d8a456" stroke-width="2" points="664,-600 664,-588 "/>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M664,-588C664,-582 670,-576 676,-576"/>
+<polyline fill="none" stroke="#d8a456" stroke-width="2" points="676,-576 746,-576 "/>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M746,-576C752,-576 758,-582 758,-588"/>
+<polyline fill="none" stroke="#d8a456" stroke-width="2" points="758,-588 758,-600 "/>
+<path fill="none" stroke="#d8a456" stroke-width="2" d="M758,-600C758,-606 752,-612 746,-612"/>
+<text text-anchor="middle" x="711" y="-591.5" font-family="sans" font-size="10.00">plot_correlation</text>
+</g>
+<!-- 7&#45;&gt;40 -->
+<g id="edge172" class="edge"><title>7&#45;&gt;40</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M990.165,-733.104C1021.39,-727.102 1060.81,-713.741 1081,-684 1089.99,-670.762 1091.65,-659.941 1081,-648 1036.12,-597.683 851.314,-624.451 768.05,-611.696"/>
+<polygon fill="grey" stroke="grey" points="768.646,-608.247 758.179,-609.886 767.383,-615.132 768.646,-608.247"/>
+</g>
+<!-- 8 -->
+<g id="node9" class="node"><title>8</title>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="462,-900 376,-900 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M376,-900C370,-900 364,-894 364,-888"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="364,-888 364,-876 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M364,-876C364,-870 370,-864 376,-864"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="376,-864 462,-864 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M462,-864C468,-864 474,-870 474,-876"/>
+<polyline fill="none" stroke="#56b1d8" stroke-width="2" points="474,-876 474,-888 "/>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M474,-888C474,-894 468,-900 462,-900"/>
+<text text-anchor="middle" x="419" y="-879.5" font-family="sans" font-size="10.00">picard_merge_sam</text>
+</g>
+<!-- 26 -->
+<g id="node27" class="node"><title>26</title>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="795,-756 685,-756 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M685,-756C679,-756 673,-750 673,-744"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="673,-744 673,-732 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M673,-732C673,-726 679,-720 685,-720"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="685,-720 795,-720 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M795,-720C801,-720 807,-726 807,-732"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="807,-732 807,-744 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M807,-744C807,-750 801,-756 795,-756"/>
+<text text-anchor="middle" x="740" y="-735.5" font-family="sans" font-size="10.00">picard_mark_duplicates</text>
+</g>
+<!-- 8&#45;&gt;26 -->
+<g id="edge124" class="edge"><title>8&#45;&gt;26</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M441.995,-863.806C468.25,-843.79 513.126,-811.837 556,-792 589.825,-776.35 629.253,-764.061 662.916,-755.186"/>
+<polygon fill="grey" stroke="grey" points="663.898,-758.547 672.705,-752.658 662.147,-751.77 663.898,-758.547"/>
+</g>
+<!-- 30 -->
+<g id="node31" class="node"><title>30</title>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="329,-828 253,-828 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M253,-828C247,-828 241,-822 241,-816"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="241,-816 241,-804 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M241,-804C241,-798 247,-792 253,-792"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="253,-792 329,-792 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M329,-792C335,-792 341,-798 341,-804"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="341,-804 341,-816 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M341,-816C341,-822 335,-828 329,-828"/>
+<text text-anchor="middle" x="291" y="-807.5" font-family="sans" font-size="10.00">qualimap_bamqc</text>
+</g>
+<!-- 8&#45;&gt;30 -->
+<g id="edge136" class="edge"><title>8&#45;&gt;30</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M386.7,-863.831C370.177,-854.537 349.881,-843.121 332.239,-833.197"/>
+<polygon fill="grey" stroke="grey" points="333.721,-830.015 323.289,-828.163 330.289,-836.116 333.721,-830.015"/>
+</g>
+<!-- 8&#45;&gt;37 -->
+<g id="edge160" class="edge"><title>8&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M474.032,-870.128C521.491,-859.831 592.127,-844.366 662.893,-828.237"/>
+<polygon fill="grey" stroke="grey" points="663.907,-831.596 672.877,-825.958 662.348,-824.772 663.907,-831.596"/>
+</g>
+<!-- 9&#45;&gt;7 -->
+<g id="edge20" class="edge"><title>9&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M765.09,-863.762C802.084,-837.903 869.83,-790.546 910.805,-761.903"/>
+<polygon fill="grey" stroke="grey" points="912.931,-764.688 919.122,-756.09 908.92,-758.95 912.931,-764.688"/>
+</g>
+<!-- 9&#45;&gt;23 -->
+<g id="edge100" class="edge"><title>9&#45;&gt;23</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M706.448,-863.831C689.795,-854.537 669.341,-843.121 651.561,-833.197"/>
+<polygon fill="grey" stroke="grey" points="652.98,-829.98 642.542,-828.163 649.568,-836.093 652.98,-829.98"/>
+</g>
+<!-- 9&#45;&gt;37 -->
+<g id="edge162" class="edge"><title>9&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M739.252,-863.831C739.359,-856.131 739.486,-846.974 739.605,-838.417"/>
+<polygon fill="grey" stroke="grey" points="743.105,-838.461 739.744,-828.413 736.106,-838.364 743.105,-838.461"/>
+</g>
+<!-- 9&#45;&gt;61 -->
+<g id="edge264" class="edge"><title>9&#45;&gt;61</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M799.188,-866.202C802.166,-865.451 805.116,-864.713 808,-864 864.99,-849.908 930.132,-834.767 976.679,-824.115"/>
+<polygon fill="grey" stroke="grey" points="977.646,-827.484 986.615,-821.845 976.086,-820.66 977.646,-827.484"/>
+</g>
+<!-- 18 -->
+<g id="node19" class="node"><title>18</title>
+<polyline fill="none" stroke="#c6d856" stroke-width="2" points="567,-108 469,-108 "/>
+<path fill="none" stroke="#c6d856" stroke-width="2" d="M469,-108C463,-108 457,-102 457,-96"/>
+<polyline fill="none" stroke="#c6d856" stroke-width="2" points="457,-96 457,-84 "/>
+<path fill="none" stroke="#c6d856" stroke-width="2" d="M457,-84C457,-78 463,-72 469,-72"/>
+<polyline fill="none" stroke="#c6d856" stroke-width="2" points="469,-72 567,-72 "/>
+<path fill="none" stroke="#c6d856" stroke-width="2" d="M567,-72C573,-72 579,-78 579,-84"/>
+<polyline fill="none" stroke="#c6d856" stroke-width="2" points="579,-84 579,-96 "/>
+<path fill="none" stroke="#c6d856" stroke-width="2" d="M579,-96C579,-102 573,-108 567,-108"/>
+<text text-anchor="middle" x="518" y="-87.5" font-family="sans" font-size="10.00">generate_all_reports</text>
+</g>
+<!-- 10&#45;&gt;18 -->
+<g id="edge90" class="edge"><title>10&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M656.317,-143.831C631.244,-134.073 600.163,-121.977 573.819,-111.724"/>
+<polygon fill="grey" stroke="grey" points="574.936,-108.403 564.348,-108.038 572.397,-114.927 574.936,-108.403"/>
+</g>
+<!-- 11&#45;&gt;18 -->
+<g id="edge92" class="edge"><title>11&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M365.279,-431.849C368.501,-404.576 374,-351.382 374,-306 374,-306 374,-306 374,-234 374,-178.499 428.148,-136.918 470.035,-113.055"/>
+<polygon fill="grey" stroke="grey" points="471.968,-115.986 479.032,-108.089 468.585,-109.857 471.968,-115.986"/>
+</g>
+<!-- 12&#45;&gt;7 -->
+<g id="edge24" class="edge"><title>12&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M886.486,-863.762C898.064,-838.876 918.906,-794.082 932.337,-765.216"/>
+<polygon fill="grey" stroke="grey" points="935.538,-766.633 936.583,-756.09 929.191,-763.68 935.538,-766.633"/>
+</g>
+<!-- 12&#45;&gt;23 -->
+<g id="edge104" class="edge"><title>12&#45;&gt;23</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M816.923,-866.269C813.906,-865.5 810.919,-864.74 808,-864 747.052,-848.545 729.01,-846.603 664.817,-828.135"/>
+<polygon fill="grey" stroke="grey" points="665.579,-824.712 655,-825.287 663.629,-831.435 665.579,-824.712"/>
+</g>
+<!-- 12&#45;&gt;37 -->
+<g id="edge154" class="edge"><title>12&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M843.177,-863.831C825.199,-854.452 803.08,-842.911 783.938,-832.924"/>
+<polygon fill="grey" stroke="grey" points="785.297,-829.685 774.812,-828.163 782.059,-835.892 785.297,-829.685"/>
+</g>
+<!-- 12&#45;&gt;61 -->
+<g id="edge270" class="edge"><title>12&#45;&gt;61</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M918.627,-863.831C940.16,-854.202 966.785,-842.295 989.515,-832.13"/>
+<polygon fill="grey" stroke="grey" points="990.965,-835.316 998.665,-828.038 988.107,-828.925 990.965,-835.316"/>
+</g>
+<!-- 13 -->
+<g id="node14" class="node"><title>13</title>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="1441,-252 1361,-252 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1361,-252C1355,-252 1349,-246 1349,-240"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="1349,-240 1349,-228 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1349,-228C1349,-222 1355,-216 1361,-216"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="1361,-216 1441,-216 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1441,-216C1447,-216 1453,-222 1453,-228"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="1453,-228 1453,-240 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M1453,-240C1453,-246 1447,-252 1441,-252"/>
+<text text-anchor="middle" x="1401" y="-231.5" font-family="sans" font-size="10.00">annotate_subset</text>
+</g>
+<!-- 43 -->
+<g id="node44" class="node"><title>43</title>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1285,-180 1237,-180 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1237,-180C1231,-180 1225,-174 1225,-168"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1225,-168 1225,-156 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1225,-156C1225,-150 1231,-144 1237,-144"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1237,-144 1285,-144 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1285,-144C1291,-144 1297,-150 1297,-156"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1297,-156 1297,-168 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1297,-168C1297,-174 1291,-180 1285,-180"/>
+<text text-anchor="middle" x="1261" y="-159.5" font-family="sans" font-size="10.00">bed_report</text>
+</g>
+<!-- 13&#45;&gt;43 -->
+<g id="edge180" class="edge"><title>13&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1365.67,-215.831C1347.35,-206.409 1324.79,-194.806 1305.31,-184.788"/>
+<polygon fill="grey" stroke="grey" points="1306.81,-181.624 1296.32,-180.163 1303.61,-187.849 1306.81,-181.624"/>
+</g>
+<!-- 14 -->
+<g id="node15" class="node"><title>14</title>
+<polyline fill="none" stroke="#56d882" stroke-width="2" points="1977,-396 1891,-396 "/>
+<path fill="none" stroke="#56d882" stroke-width="2" d="M1891,-396C1885,-396 1879,-390 1879,-384"/>
+<polyline fill="none" stroke="#56d882" stroke-width="2" points="1879,-384 1879,-372 "/>
+<path fill="none" stroke="#56d882" stroke-width="2" d="M1879,-372C1879,-366 1885,-360 1891,-360"/>
+<polyline fill="none" stroke="#56d882" stroke-width="2" points="1891,-360 1977,-360 "/>
+<path fill="none" stroke="#56d882" stroke-width="2" d="M1977,-360C1983,-360 1989,-366 1989,-372"/>
+<polyline fill="none" stroke="#56d882" stroke-width="2" points="1989,-372 1989,-384 "/>
+<path fill="none" stroke="#56d882" stroke-width="2" d="M1989,-384C1989,-390 1983,-396 1977,-396"/>
+<text text-anchor="middle" x="1934" y="-375.5" font-family="sans" font-size="10.00">genome_coverage</text>
+</g>
+<!-- 14&#45;&gt;43 -->
+<g id="edge184" class="edge"><title>14&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1932.89,-359.75C1929.73,-325.29 1917.57,-250.973 1871,-216 1826.25,-182.396 1442.49,-167.54 1307.63,-163.324"/>
+<polygon fill="grey" stroke="grey" points="1307.33,-159.814 1297.23,-163.005 1307.12,-166.81 1307.33,-159.814"/>
+</g>
+<!-- 15 -->
+<g id="node16" class="node"><title>15</title>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="1527,-540 1417,-540 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M1417,-540C1411,-540 1405,-534 1405,-528"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="1405,-528 1405,-516 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M1405,-516C1405,-510 1411,-504 1417,-504"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="1417,-504 1527,-504 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M1527,-504C1533,-504 1539,-510 1539,-516"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="1539,-516 1539,-528 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M1539,-528C1539,-534 1533,-540 1527,-540"/>
+<text text-anchor="middle" x="1472" y="-519.5" font-family="sans" font-size="10.00">aggregate_bed_jaccard</text>
+</g>
+<!-- 15&#45;&gt;43 -->
+<g id="edge186" class="edge"><title>15&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1404.63,-517.736C1305.31,-510.663 1124.99,-494.585 1066,-468 976.955,-427.873 940.549,-412.854 900,-324 893.357,-309.444 891.627,-301.634 900,-288 909.258,-272.926 1025.42,-222.157 1042,-216 1100.07,-194.44 1169.99,-178.907 1214.94,-170.184"/>
+<polygon fill="grey" stroke="grey" points="1215.81,-173.582 1224.98,-168.27 1214.5,-166.706 1215.81,-173.582"/>
+</g>
+<!-- 16&#45;&gt;10 -->
+<g id="edge34" class="edge"><title>16&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M757.148,-287.762C746.479,-262.985 727.314,-218.471 714.881,-189.594"/>
+<polygon fill="grey" stroke="grey" points="717.958,-187.89 710.789,-180.09 711.529,-190.659 717.958,-187.89"/>
+</g>
+<!-- 24 -->
+<g id="node25" class="node"><title>24</title>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="901,-252 781,-252 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M781,-252C775,-252 769,-246 769,-240"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="769,-240 769,-228 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M769,-228C769,-222 775,-216 781,-216"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="781,-216 901,-216 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M901,-216C907,-216 913,-222 913,-228"/>
+<polyline fill="none" stroke="#d86e56" stroke-width="2" points="913,-228 913,-240 "/>
+<path fill="none" stroke="#d86e56" stroke-width="2" d="M913,-240C913,-246 907,-252 901,-252"/>
+<text text-anchor="middle" x="841" y="-231.5" font-family="sans" font-size="10.00">get_main_annotation_file</text>
+</g>
+<!-- 16&#45;&gt;24 -->
+<g id="edge106" class="edge"><title>16&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M784.178,-287.831C793.359,-279.134 804.501,-268.578 814.478,-259.127"/>
+<polygon fill="grey" stroke="grey" points="816.976,-261.581 821.828,-252.163 812.161,-256.499 816.976,-261.581"/>
+</g>
+<!-- 16&#45;&gt;43 -->
+<g id="edge188" class="edge"><title>16&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M834.962,-287.952C863.015,-279.201 895.02,-267.239 922,-252 943.702,-239.742 942.436,-226.588 965,-216 1008.61,-195.538 1143.01,-176.532 1214.75,-167.504"/>
+<polygon fill="grey" stroke="grey" points="1215.38,-170.954 1224.87,-166.245 1214.51,-164.007 1215.38,-170.954"/>
+</g>
+<!-- 18&#45;&gt;17 -->
+<g id="edge78" class="edge"><title>18&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M579.207,-74.2705C583.855,-73.3955 588.49,-72.6216 593,-72 856.203,-35.7266 1686.9,-22.1196 1931.56,-18.8396"/>
+<polygon fill="grey" stroke="grey" points="1931.78,-22.337 1941.74,-18.7046 1931.69,-15.3376 1931.78,-22.337"/>
+</g>
+<!-- 19 -->
+<g id="node20" class="node"><title>19</title>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="1493,-396 1369,-396 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M1369,-396C1363,-396 1357,-390 1357,-384"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="1357,-384 1357,-372 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M1357,-372C1357,-366 1363,-360 1369,-360"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="1369,-360 1493,-360 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M1493,-360C1499,-360 1505,-366 1505,-372"/>
+<polyline fill="none" stroke="#68d856" stroke-width="2" points="1505,-372 1505,-384 "/>
+<path fill="none" stroke="#68d856" stroke-width="2" d="M1505,-384C1505,-390 1499,-396 1493,-396"/>
+<text text-anchor="middle" x="1431" y="-375.5" font-family="sans" font-size="10.00">homer_annotation_to_bed</text>
+</g>
+<!-- 27 -->
+<g id="node28" class="node"><title>27</title>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1531,-324 1429,-324 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1429,-324C1423,-324 1417,-318 1417,-312"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1417,-312 1417,-300 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1417,-300C1417,-294 1423,-288 1429,-288"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1429,-288 1531,-288 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1531,-288C1537,-288 1543,-294 1543,-300"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1543,-300 1543,-312 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1543,-312C1543,-318 1537,-324 1531,-324"/>
+<text text-anchor="middle" x="1480" y="-303.5" font-family="sans" font-size="10.00">annotate_to_multiple</text>
+</g>
+<!-- 19&#45;&gt;27 -->
+<g id="edge128" class="edge"><title>19&#45;&gt;27</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1443.36,-359.831C1448.95,-351.623 1455.66,-341.761 1461.81,-332.732"/>
+<polygon fill="grey" stroke="grey" points="1464.74,-334.65 1467.47,-324.413 1458.95,-330.711 1464.74,-334.65"/>
+</g>
+<!-- 56 -->
+<g id="node57" class="node"><title>56</title>
+<polyline fill="none" stroke="#56c9d8" stroke-width="2" points="1071,-324 921,-324 "/>
+<path fill="none" stroke="#56c9d8" stroke-width="2" d="M921,-324C915,-324 909,-318 909,-312"/>
+<polyline fill="none" stroke="#56c9d8" stroke-width="2" points="909,-312 909,-300 "/>
+<path fill="none" stroke="#56c9d8" stroke-width="2" d="M909,-300C909,-294 915,-288 921,-288"/>
+<polyline fill="none" stroke="#56c9d8" stroke-width="2" points="921,-288 1071,-288 "/>
+<path fill="none" stroke="#56c9d8" stroke-width="2" d="M1071,-288C1077,-288 1083,-294 1083,-300"/>
+<polyline fill="none" stroke="#56c9d8" stroke-width="2" points="1083,-300 1083,-312 "/>
+<path fill="none" stroke="#56c9d8" stroke-width="2" d="M1083,-312C1083,-318 1077,-324 1071,-324"/>
+<text text-anchor="middle" x="996" y="-303.5" font-family="sans" font-size="10.00">annotate_TSS_distance_groups</text>
+</g>
+<!-- 19&#45;&gt;56 -->
+<g id="edge250" class="edge"><title>19&#45;&gt;56</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1356.72,-365.705C1283.99,-353.668 1172.82,-335.267 1093.13,-322.076"/>
+<polygon fill="grey" stroke="grey" points="1093.52,-318.593 1083.08,-320.413 1092.37,-325.499 1093.52,-318.593"/>
+</g>
+<!-- 20 -->
+<g id="node21" class="node"><title>20</title>
+<polyline fill="none" stroke="#568ad8" stroke-width="2" points="1779,-324 1687,-324 "/>
+<path fill="none" stroke="#568ad8" stroke-width="2" d="M1687,-324C1681,-324 1675,-318 1675,-312"/>
+<polyline fill="none" stroke="#568ad8" stroke-width="2" points="1675,-312 1675,-300 "/>
+<path fill="none" stroke="#568ad8" stroke-width="2" d="M1675,-300C1675,-294 1681,-288 1687,-288"/>
+<polyline fill="none" stroke="#568ad8" stroke-width="2" points="1687,-288 1779,-288 "/>
+<path fill="none" stroke="#568ad8" stroke-width="2" d="M1779,-288C1785,-288 1791,-294 1791,-300"/>
+<polyline fill="none" stroke="#568ad8" stroke-width="2" points="1791,-300 1791,-312 "/>
+<path fill="none" stroke="#568ad8" stroke-width="2" d="M1791,-312C1791,-318 1785,-324 1779,-324"/>
+<text text-anchor="middle" x="1733" y="-303.5" font-family="sans" font-size="10.00">find_motif_positions</text>
+</g>
+<!-- 20&#45;&gt;43 -->
+<g id="edge200" class="edge"><title>20&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1733.27,-287.684C1732.5,-266.933 1727.81,-233.707 1707,-216 1676.93,-190.41 1415.35,-171.515 1307.34,-164.736"/>
+<polygon fill="grey" stroke="grey" points="1307.28,-161.225 1297.08,-164.099 1306.84,-168.212 1307.28,-161.225"/>
+</g>
+<!-- 21 -->
+<g id="node22" class="node"><title>21</title>
+<polyline fill="none" stroke="#d88556" stroke-width="2" points="388,-612 264,-612 "/>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M264,-612C258,-612 252,-606 252,-600"/>
+<polyline fill="none" stroke="#d88556" stroke-width="2" points="252,-600 252,-588 "/>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M252,-588C252,-582 258,-576 264,-576"/>
+<polyline fill="none" stroke="#d88556" stroke-width="2" points="264,-576 388,-576 "/>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M388,-576C394,-576 400,-582 400,-588"/>
+<polyline fill="none" stroke="#d88556" stroke-width="2" points="400,-588 400,-600 "/>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M400,-600C400,-606 394,-612 388,-612"/>
+<text text-anchor="middle" x="326" y="-591.5" font-family="sans" font-size="10.00">aggregate_phantompeaks</text>
+</g>
+<!-- 21&#45;&gt;11 -->
+<g id="edge46" class="edge"><title>21&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M324.204,-575.966C322.958,-557.471 322.643,-528.082 330,-504 332.876,-494.586 337.781,-485.147 342.942,-476.854"/>
+<polygon fill="grey" stroke="grey" points="345.963,-478.632 348.56,-468.36 340.124,-474.77 345.963,-478.632"/>
+</g>
+<!-- 22 -->
+<g id="node23" class="node"><title>22</title>
+<polyline fill="none" stroke="#56d89a" stroke-width="2" points="210,-684 172,-684 "/>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M172,-684C166,-684 160,-678 160,-672"/>
+<polyline fill="none" stroke="#56d89a" stroke-width="2" points="160,-672 160,-660 "/>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M160,-660C160,-654 166,-648 172,-648"/>
+<polyline fill="none" stroke="#56d89a" stroke-width="2" points="172,-648 210,-648 "/>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M210,-648C216,-648 222,-654 222,-660"/>
+<polyline fill="none" stroke="#56d89a" stroke-width="2" points="222,-660 222,-672 "/>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M222,-672C222,-678 216,-684 210,-684"/>
+<text text-anchor="middle" x="191" y="-663.5" font-family="sans" font-size="10.00">rulegraph</text>
+</g>
+<!-- 22&#45;&gt;0 -->
+<g id="edge2" class="edge"><title>22&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M191,-647.831C191,-640.131 191,-630.974 191,-622.417"/>
+<polygon fill="grey" stroke="grey" points="194.5,-622.413 191,-612.413 187.5,-622.413 194.5,-622.413"/>
+</g>
+<!-- 23&#45;&gt;2 -->
+<g id="edge10" class="edge"><title>23&#45;&gt;2</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M576.286,-791.885C539.631,-770.589 481.66,-731.987 447,-684 417.206,-642.75 433.381,-620.663 409,-576 403.793,-566.461 396.943,-556.843 390.289,-548.42"/>
+<polygon fill="grey" stroke="grey" points="392.807,-545.971 383.769,-540.443 387.388,-550.401 392.807,-545.971"/>
+</g>
+<!-- 23&#45;&gt;21 -->
+<g id="edge98" class="edge"><title>23&#45;&gt;21</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M564.674,-801.779C514.374,-791.93 437.22,-774.435 414,-756 370.195,-721.223 345.189,-658.438 333.729,-622.088"/>
+<polygon fill="grey" stroke="grey" points="336.968,-620.709 330.726,-612.148 330.267,-622.733 336.968,-620.709"/>
+</g>
+<!-- 24&#45;&gt;10 -->
+<g id="edge40" class="edge"><title>24&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M806.177,-215.831C788.199,-206.452 766.08,-194.911 746.938,-184.924"/>
+<polygon fill="grey" stroke="grey" points="748.297,-181.685 737.812,-180.163 745.059,-187.892 748.297,-181.685"/>
+</g>
+<!-- 24&#45;&gt;43 -->
+<g id="edge210" class="edge"><title>24&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M913.213,-218.634C917.874,-217.72 922.5,-216.834 927,-216 1029.31,-197.034 1149.95,-178.46 1214.57,-168.817"/>
+<polygon fill="grey" stroke="grey" points="1215.45,-172.224 1224.83,-167.29 1214.42,-165.3 1215.45,-172.224"/>
+</g>
+<!-- 25 -->
+<g id="node26" class="node"><title>25</title>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1649,-540 1569,-540 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1569,-540C1563,-540 1557,-534 1557,-528"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1557,-528 1557,-516 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1557,-516C1557,-510 1563,-504 1569,-504"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1569,-504 1649,-504 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1649,-504C1655,-504 1661,-510 1661,-516"/>
+<polyline fill="none" stroke="#d89556" stroke-width="2" points="1661,-516 1661,-528 "/>
+<path fill="none" stroke="#d89556" stroke-width="2" d="M1661,-528C1661,-534 1655,-540 1649,-540"/>
+<text text-anchor="middle" x="1609" y="-519.5" font-family="sans" font-size="10.00">intersect_all_bed</text>
+</g>
+<!-- 25&#45;&gt;24 -->
+<g id="edge116" class="edge"><title>25&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1556.93,-505.845C1553.93,-505.159 1550.93,-504.536 1548,-504 1349.63,-467.75 778.025,-549.288 647,-396 615.811,-359.511 616.938,-326.204 646,-288 653.303,-278.4 708.897,-263.558 758.864,-251.862"/>
+<polygon fill="grey" stroke="grey" points="759.814,-255.234 768.766,-249.567 758.234,-248.415 759.814,-255.234"/>
+</g>
+<!-- 35 -->
+<g id="node36" class="node"><title>35</title>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="1653,-252 1521,-252 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M1521,-252C1515,-252 1509,-246 1509,-240"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="1509,-240 1509,-228 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M1509,-228C1509,-222 1515,-216 1521,-216"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="1521,-216 1653,-216 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M1653,-216C1659,-216 1665,-222 1665,-228"/>
+<polyline fill="none" stroke="#d89c56" stroke-width="2" points="1665,-228 1665,-240 "/>
+<path fill="none" stroke="#d89c56" stroke-width="2" d="M1665,-240C1665,-246 1659,-252 1653,-252"/>
+<text text-anchor="middle" x="1587" y="-231.5" font-family="sans" font-size="10.00">merge_annotated_features</text>
+</g>
+<!-- 25&#45;&gt;35 -->
+<g id="edge152" class="edge"><title>25&#45;&gt;35</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1607.62,-503.984C1603.9,-455.294 1593.63,-320.796 1589.14,-262.057"/>
+<polygon fill="grey" stroke="grey" points="1592.63,-261.713 1588.38,-252.009 1585.65,-262.246 1592.63,-261.713"/>
+</g>
+<!-- 25&#45;&gt;43 -->
+<g id="edge208" class="edge"><title>25&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1644.11,-503.944C1709.02,-467.885 1837.88,-382.277 1800,-288 1783.48,-246.874 1770.7,-235.698 1731,-216 1657.29,-179.43 1411.04,-167.025 1307.5,-163.361"/>
+<polygon fill="grey" stroke="grey" points="1307.46,-159.858 1297.35,-163.014 1307.22,-166.854 1307.46,-159.858"/>
+</g>
+<!-- 58 -->
+<g id="node59" class="node"><title>58</title>
+<polyline fill="none" stroke="#59d856" stroke-width="2" points="1375,-468 1283,-468 "/>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M1283,-468C1277,-468 1271,-462 1271,-456"/>
+<polyline fill="none" stroke="#59d856" stroke-width="2" points="1271,-456 1271,-444 "/>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M1271,-444C1271,-438 1277,-432 1283,-432"/>
+<polyline fill="none" stroke="#59d856" stroke-width="2" points="1283,-432 1375,-432 "/>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M1375,-432C1381,-432 1387,-438 1387,-444"/>
+<polyline fill="none" stroke="#59d856" stroke-width="2" points="1387,-444 1387,-456 "/>
+<path fill="none" stroke="#59d856" stroke-width="2" d="M1387,-456C1387,-462 1381,-468 1375,-468"/>
+<text text-anchor="middle" x="1329" y="-447.5" font-family="sans" font-size="10.00">merge_close_peaks</text>
+</g>
+<!-- 25&#45;&gt;58 -->
+<g id="edge258" class="edge"><title>25&#45;&gt;58</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1556.8,-506.428C1553.83,-505.594 1550.88,-504.779 1548,-504 1497.68,-490.379 1440.47,-476.321 1397.14,-465.964"/>
+<polygon fill="grey" stroke="grey" points="1397.82,-462.528 1387.28,-463.613 1396.2,-469.337 1397.82,-462.528"/>
+</g>
+<!-- 26&#45;&gt;37 -->
+<g id="edge164" class="edge"><title>26&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M745.976,-756.413C746.707,-764.059 746.92,-773.108 746.615,-781.573"/>
+<polygon fill="grey" stroke="grey" points="743.105,-781.626 745.952,-791.831 750.09,-782.078 743.105,-781.626"/>
+</g>
+<!-- 41 -->
+<g id="node42" class="node"><title>41</title>
+<polyline fill="none" stroke="#56d873" stroke-width="2" points="1060,-684 846,-684 "/>
+<path fill="none" stroke="#56d873" stroke-width="2" d="M846,-684C840,-684 834,-678 834,-672"/>
+<polyline fill="none" stroke="#56d873" stroke-width="2" points="834,-672 834,-660 "/>
+<path fill="none" stroke="#56d873" stroke-width="2" d="M834,-660C834,-654 840,-648 846,-648"/>
+<polyline fill="none" stroke="#56d873" stroke-width="2" points="846,-648 1060,-648 "/>
+<path fill="none" stroke="#56d873" stroke-width="2" d="M1060,-648C1066,-648 1072,-654 1072,-660"/>
+<polyline fill="none" stroke="#56d873" stroke-width="2" points="1072,-660 1072,-672 "/>
+<path fill="none" stroke="#56d873" stroke-width="2" d="M1072,-672C1072,-678 1066,-684 1060,-684"/>
+<text text-anchor="middle" x="953" y="-663.5" font-family="sans" font-size="10.00">picard_collect_alignment_summary_metrics</text>
+</g>
+<!-- 26&#45;&gt;41 -->
+<g id="edge176" class="edge"><title>26&#45;&gt;41</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M793.474,-719.924C822.719,-710.039 859.126,-697.732 889.773,-687.373"/>
+<polygon fill="grey" stroke="grey" points="891.151,-690.601 899.504,-684.083 888.91,-683.97 891.151,-690.601"/>
+</g>
+<!-- 44 -->
+<g id="node45" class="node"><title>44</title>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="804,-684 676,-684 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M676,-684C670,-684 664,-678 664,-672"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="664,-672 664,-660 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M664,-660C664,-654 670,-648 676,-648"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="676,-648 804,-648 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M804,-648C810,-648 816,-654 816,-660"/>
+<polyline fill="none" stroke="#56c1d8" stroke-width="2" points="816,-660 816,-672 "/>
+<path fill="none" stroke="#56c1d8" stroke-width="2" d="M816,-672C816,-678 810,-684 804,-684"/>
+<text text-anchor="middle" x="740" y="-663.5" font-family="sans" font-size="10.00">picard_mark_duplicates_log</text>
+</g>
+<!-- 26&#45;&gt;44 -->
+<g id="edge212" class="edge"><title>26&#45;&gt;44</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M740,-719.831C740,-712.131 740,-702.974 740,-694.417"/>
+<polygon fill="grey" stroke="grey" points="743.5,-694.413 740,-684.413 736.5,-694.413 743.5,-694.413"/>
+</g>
+<!-- 51 -->
+<g id="node52" class="node"><title>51</title>
+<polyline fill="none" stroke="#d8bc56" stroke-width="2" points="634,-684 468,-684 "/>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M468,-684C462,-684 456,-678 456,-672"/>
+<polyline fill="none" stroke="#d8bc56" stroke-width="2" points="456,-672 456,-660 "/>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M456,-660C456,-654 462,-648 468,-648"/>
+<polyline fill="none" stroke="#d8bc56" stroke-width="2" points="468,-648 634,-648 "/>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M634,-648C640,-648 646,-654 646,-660"/>
+<polyline fill="none" stroke="#d8bc56" stroke-width="2" points="646,-660 646,-672 "/>
+<path fill="none" stroke="#d8bc56" stroke-width="2" d="M646,-672C646,-678 640,-684 634,-684"/>
+<text text-anchor="middle" x="551" y="-663.5" font-family="sans" font-size="10.00">picard_collect_insert_size_metrics</text>
+</g>
+<!-- 26&#45;&gt;51 -->
+<g id="edge240" class="edge"><title>26&#45;&gt;51</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M692.307,-719.831C666.693,-710.073 634.94,-697.977 608.026,-687.724"/>
+<polygon fill="grey" stroke="grey" points="608.941,-684.327 598.35,-684.038 606.449,-690.869 608.941,-684.327"/>
+</g>
+<!-- 57 -->
+<g id="node58" class="node"><title>57</title>
+<polyline fill="none" stroke="#565bd8" stroke-width="2" points="2098,-684 1996,-684 "/>
+<path fill="none" stroke="#565bd8" stroke-width="2" d="M1996,-684C1990,-684 1984,-678 1984,-672"/>
+<polyline fill="none" stroke="#565bd8" stroke-width="2" points="1984,-672 1984,-660 "/>
+<path fill="none" stroke="#565bd8" stroke-width="2" d="M1984,-660C1984,-654 1990,-648 1996,-648"/>
+<polyline fill="none" stroke="#565bd8" stroke-width="2" points="1996,-648 2098,-648 "/>
+<path fill="none" stroke="#565bd8" stroke-width="2" d="M2098,-648C2104,-648 2110,-654 2110,-660"/>
+<polyline fill="none" stroke="#565bd8" stroke-width="2" points="2110,-660 2110,-672 "/>
+<path fill="none" stroke="#565bd8" stroke-width="2" d="M2110,-672C2110,-678 2104,-684 2098,-684"/>
+<text text-anchor="middle" x="2047" y="-663.5" font-family="sans" font-size="10.00">bamtools_filter_script</text>
+</g>
+<!-- 26&#45;&gt;57 -->
+<g id="edge254" class="edge"><title>26&#45;&gt;57</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M807.169,-728.929C833.295,-725.68 863.504,-722.265 891,-720 1239.96,-691.258 1328.25,-700.53 1678,-684 1780.64,-679.149 1899.34,-673.32 1973.79,-669.637"/>
+<polygon fill="grey" stroke="grey" points="1974.18,-673.122 1984,-669.132 1973.84,-666.13 1974.18,-673.122"/>
+</g>
+<!-- 27&#45;&gt;13 -->
+<g id="edge54" class="edge"><title>27&#45;&gt;13</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1460.06,-287.831C1450.52,-279.134 1438.94,-268.578 1428.57,-259.127"/>
+<polygon fill="grey" stroke="grey" points="1430.68,-256.312 1420.93,-252.163 1425.96,-261.486 1430.68,-256.312"/>
+</g>
+<!-- 27&#45;&gt;35 -->
+<g id="edge150" class="edge"><title>27&#45;&gt;35</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1507,-287.831C1520.43,-278.793 1536.85,-267.748 1551.3,-258.02"/>
+<polygon fill="grey" stroke="grey" points="1553.67,-260.649 1560.01,-252.163 1549.76,-254.842 1553.67,-260.649"/>
+</g>
+<!-- 27&#45;&gt;43 -->
+<g id="edge178" class="edge"><title>27&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1481.58,-287.971C1482.31,-267.802 1480.19,-235.419 1462,-216 1441.1,-193.691 1360,-177.28 1307.43,-168.718"/>
+<polygon fill="grey" stroke="grey" points="1307.74,-165.224 1297.32,-167.111 1306.64,-172.138 1307.74,-165.224"/>
+</g>
+<!-- 59 -->
+<g id="node60" class="node"><title>59</title>
+<polyline fill="none" stroke="#d8cb56" stroke-width="2" points="1319,-252 1269,-252 "/>
+<path fill="none" stroke="#d8cb56" stroke-width="2" d="M1269,-252C1263,-252 1257,-246 1257,-240"/>
+<polyline fill="none" stroke="#d8cb56" stroke-width="2" points="1257,-240 1257,-228 "/>
+<path fill="none" stroke="#d8cb56" stroke-width="2" d="M1257,-228C1257,-222 1263,-216 1269,-216"/>
+<polyline fill="none" stroke="#d8cb56" stroke-width="2" points="1269,-216 1319,-216 "/>
+<path fill="none" stroke="#d8cb56" stroke-width="2" d="M1319,-216C1325,-216 1331,-222 1331,-228"/>
+<polyline fill="none" stroke="#d8cb56" stroke-width="2" points="1331,-228 1331,-240 "/>
+<path fill="none" stroke="#d8cb56" stroke-width="2" d="M1331,-240C1331,-246 1325,-252 1319,-252"/>
+<text text-anchor="middle" x="1294" y="-231.5" font-family="sans" font-size="10.00">get_all_TSS</text>
+</g>
+<!-- 27&#45;&gt;59 -->
+<g id="edge260" class="edge"><title>27&#45;&gt;59</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1432.99,-287.934C1408.04,-278.328 1376.63,-266.214 1340.85,-252.322"/>
+<polygon fill="grey" stroke="grey" points="1341.74,-248.914 1331.15,-248.555 1339.2,-255.439 1341.74,-248.914"/>
+</g>
+<!-- 28 -->
+<g id="node29" class="node"><title>28</title>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="1825,-540 1719,-540 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M1719,-540C1713,-540 1707,-534 1707,-528"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="1707,-528 1707,-516 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M1707,-516C1707,-510 1713,-504 1719,-504"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="1719,-504 1825,-504 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M1825,-504C1831,-504 1837,-510 1837,-516"/>
+<polyline fill="none" stroke="#a7d856" stroke-width="2" points="1837,-516 1837,-528 "/>
+<path fill="none" stroke="#a7d856" stroke-width="2" d="M1837,-528C1837,-534 1831,-540 1825,-540"/>
+<text text-anchor="middle" x="1772" y="-519.5" font-family="sans" font-size="10.00">aggregate_bed_reldist</text>
+</g>
+<!-- 28&#45;&gt;43 -->
+<g id="edge182" class="edge"><title>28&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1780.02,-503.999C1803.81,-447.676 1866.52,-275.058 1770,-216 1731,-192.139 1425.79,-171.746 1307.56,-164.663"/>
+<polygon fill="grey" stroke="grey" points="1307.57,-161.158 1297.38,-164.059 1307.15,-168.146 1307.57,-161.158"/>
+</g>
+<!-- 29 -->
+<g id="node30" class="node"><title>29</title>
+<polyline fill="none" stroke="#9fd856" stroke-width="2" points="1241,-468 1087,-468 "/>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M1087,-468C1081,-468 1075,-462 1075,-456"/>
+<polyline fill="none" stroke="#9fd856" stroke-width="2" points="1075,-456 1075,-444 "/>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M1075,-444C1075,-438 1081,-432 1087,-432"/>
+<polyline fill="none" stroke="#9fd856" stroke-width="2" points="1087,-432 1241,-432 "/>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M1241,-432C1247,-432 1253,-438 1253,-444"/>
+<polyline fill="none" stroke="#9fd856" stroke-width="2" points="1253,-444 1253,-456 "/>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M1253,-456C1253,-462 1247,-468 1241,-468"/>
+<text text-anchor="middle" x="1164" y="-447.5" font-family="sans" font-size="10.00">download_histone_modifications</text>
+</g>
+<!-- 63 -->
+<g id="node64" class="node"><title>63</title>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1289,-396 1121,-396 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1121,-396C1115,-396 1109,-390 1109,-384"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1109,-384 1109,-372 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1109,-372C1109,-366 1115,-360 1121,-360"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1121,-360 1289,-360 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1289,-360C1295,-360 1301,-366 1301,-372"/>
+<polyline fill="none" stroke="#80d856" stroke-width="2" points="1301,-372 1301,-384 "/>
+<path fill="none" stroke="#80d856" stroke-width="2" d="M1301,-384C1301,-390 1295,-396 1289,-396"/>
+<text text-anchor="middle" x="1205" y="-375.5" font-family="sans" font-size="10.00">compare_to_histone_modifications</text>
+</g>
+<!-- 29&#45;&gt;63 -->
+<g id="edge274" class="edge"><title>29&#45;&gt;63</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1174.35,-431.831C1178.92,-423.792 1184.41,-414.167 1189.46,-405.292"/>
+<polygon fill="grey" stroke="grey" points="1192.61,-406.835 1194.51,-396.413 1186.52,-403.371 1192.61,-406.835"/>
+</g>
+<!-- 30&#45;&gt;4 -->
+<g id="edge12" class="edge"><title>30&#45;&gt;4</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M277.121,-791.831C270.786,-783.539 263.162,-773.557 256.207,-764.453"/>
+<polygon fill="grey" stroke="grey" points="258.917,-762.235 250.066,-756.413 253.355,-766.485 258.917,-762.235"/>
+</g>
+<!-- 31 -->
+<g id="node32" class="node"><title>31</title>
+<polyline fill="none" stroke="#97d856" stroke-width="2" points="1682,-612 1634,-612 "/>
+<path fill="none" stroke="#97d856" stroke-width="2" d="M1634,-612C1628,-612 1622,-606 1622,-600"/>
+<polyline fill="none" stroke="#97d856" stroke-width="2" points="1622,-600 1622,-588 "/>
+<path fill="none" stroke="#97d856" stroke-width="2" d="M1622,-588C1622,-582 1628,-576 1634,-576"/>
+<polyline fill="none" stroke="#97d856" stroke-width="2" points="1634,-576 1682,-576 "/>
+<path fill="none" stroke="#97d856" stroke-width="2" d="M1682,-576C1688,-576 1694,-582 1694,-588"/>
+<polyline fill="none" stroke="#97d856" stroke-width="2" points="1694,-588 1694,-600 "/>
+<path fill="none" stroke="#97d856" stroke-width="2" d="M1694,-600C1694,-606 1688,-612 1682,-612"/>
+<text text-anchor="middle" x="1658" y="-591.5" font-family="sans" font-size="10.00">filter_macs</text>
+</g>
+<!-- 31&#45;&gt;15 -->
+<g id="edge66" class="edge"><title>31&#45;&gt;15</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1621.7,-579.947C1595.18,-569.681 1558.71,-555.564 1528.41,-543.835"/>
+<polygon fill="grey" stroke="grey" points="1529.4,-540.465 1518.81,-540.119 1526.87,-546.993 1529.4,-540.465"/>
+</g>
+<!-- 31&#45;&gt;25 -->
+<g id="edge118" class="edge"><title>31&#45;&gt;25</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1645.64,-575.831C1640.05,-567.623 1633.34,-557.761 1627.19,-548.732"/>
+<polygon fill="grey" stroke="grey" points="1630.05,-546.711 1621.53,-540.413 1624.26,-550.65 1630.05,-546.711"/>
+</g>
+<!-- 31&#45;&gt;28 -->
+<g id="edge134" class="edge"><title>31&#45;&gt;28</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1686.77,-575.831C1701.21,-566.707 1718.9,-555.539 1734.4,-545.745"/>
+<polygon fill="grey" stroke="grey" points="1736.66,-548.462 1743.24,-540.163 1732.92,-542.544 1736.66,-548.462"/>
+</g>
+<!-- 32 -->
+<g id="node33" class="node"><title>32</title>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="663,-252 463,-252 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M463,-252C457,-252 451,-246 451,-240"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="451,-240 451,-228 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M451,-228C451,-222 457,-216 463,-216"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="463,-216 663,-216 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M663,-216C669,-216 675,-222 675,-228"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="675,-228 675,-240 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M675,-240C675,-246 669,-252 663,-252"/>
+<text text-anchor="middle" x="563" y="-231.5" font-family="sans" font-size="10.00">ling_macs2_combine_xls_with_annotation</text>
+</g>
+<!-- 32&#45;&gt;10 -->
+<g id="edge36" class="edge"><title>32&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M598.328,-215.831C616.649,-206.409 639.21,-194.806 658.689,-184.788"/>
+<polygon fill="grey" stroke="grey" points="660.391,-187.849 667.683,-180.163 657.19,-181.624 660.391,-187.849"/>
+</g>
+<!-- 32&#45;&gt;17 -->
+<g id="edge74" class="edge"><title>32&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M570.058,-215.863C579.376,-194.621 597.955,-160.272 626,-144 739.794,-77.9727 1670.16,-32.4778 1931.78,-20.9004"/>
+<polygon fill="grey" stroke="grey" points="1932.07,-24.3912 1941.91,-20.4546 1931.76,-17.398 1932.07,-24.3912"/>
+</g>
+<!-- 32&#45;&gt;43 -->
+<g id="edge194" class="edge"><title>32&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M675.025,-221.576C692.423,-219.683 710.194,-217.772 727,-216 907.242,-196.996 1121.86,-175.697 1214.56,-166.561"/>
+<polygon fill="grey" stroke="grey" points="1215.12,-170.023 1224.73,-165.56 1214.44,-163.057 1215.12,-170.023"/>
+</g>
+<!-- 33 -->
+<g id="node34" class="node"><title>33</title>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="2015,-108 1985,-108 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M1985,-108C1979,-108 1973,-102 1973,-96"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="1973,-96 1973,-84 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M1973,-84C1973,-78 1979,-72 1985,-72"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="1985,-72 2015,-72 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M2015,-72C2021,-72 2027,-78 2027,-84"/>
+<polyline fill="none" stroke="#567bd8" stroke-width="2" points="2027,-84 2027,-96 "/>
+<path fill="none" stroke="#567bd8" stroke-width="2" d="M2027,-96C2027,-102 2021,-108 2015,-108"/>
+<text text-anchor="middle" x="2000" y="-87.5" font-family="sans" font-size="10.00">bdg2bw</text>
+</g>
+<!-- 33&#45;&gt;17 -->
+<g id="edge76" class="edge"><title>33&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M2000,-71.8314C2000,-64.131 2000,-54.9743 2000,-46.4166"/>
+<polygon fill="grey" stroke="grey" points="2003.5,-46.4132 2000,-36.4133 1996.5,-46.4133 2003.5,-46.4132"/>
+</g>
+<!-- 34 -->
+<g id="node35" class="node"><title>34</title>
+<polyline fill="none" stroke="#d8b456" stroke-width="2" points="585,-468 501,-468 "/>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M501,-468C495,-468 489,-462 489,-456"/>
+<polyline fill="none" stroke="#d8b456" stroke-width="2" points="489,-456 489,-444 "/>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M489,-444C489,-438 495,-432 501,-432"/>
+<polyline fill="none" stroke="#d8b456" stroke-width="2" points="501,-432 585,-432 "/>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M585,-432C591,-432 597,-438 597,-444"/>
+<polyline fill="none" stroke="#d8b456" stroke-width="2" points="597,-444 597,-456 "/>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M597,-456C597,-462 591,-468 585,-468"/>
+<text text-anchor="middle" x="543" y="-447.5" font-family="sans" font-size="10.00">macs2_xls_to_bed</text>
+</g>
+<!-- 49 -->
+<g id="node50" class="node"><title>49</title>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="571,-324 501,-324 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M501,-324C495,-324 489,-318 489,-312"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="489,-312 489,-300 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M489,-300C489,-294 495,-288 501,-288"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="501,-288 571,-288 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M571,-288C577,-288 583,-294 583,-300"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="583,-300 583,-312 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M583,-312C583,-318 577,-324 571,-324"/>
+<text text-anchor="middle" x="536" y="-303.5" font-family="sans" font-size="10.00">annotatePeaks</text>
+</g>
+<!-- 34&#45;&gt;49 -->
+<g id="edge234" class="edge"><title>34&#45;&gt;49</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M542.113,-431.762C540.919,-407.201 538.783,-363.247 537.378,-334.354"/>
+<polygon fill="grey" stroke="grey" points="540.861,-333.908 536.879,-324.09 533.869,-334.248 540.861,-333.908"/>
+</g>
+<!-- 35&#45;&gt;43 -->
+<g id="edge196" class="edge"><title>35&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1508.91,-216.754C1446.05,-202.869 1359.8,-183.82 1307.36,-172.24"/>
+<polygon fill="grey" stroke="grey" points="1307.86,-168.766 1297.34,-170.027 1306.35,-175.601 1307.86,-168.766"/>
+</g>
+<!-- 36 -->
+<g id="node37" class="node"><title>36</title>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="1327,-756 1243,-756 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M1243,-756C1237,-756 1231,-750 1231,-744"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="1231,-744 1231,-732 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M1231,-732C1231,-726 1237,-720 1243,-720"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="1243,-720 1327,-720 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M1327,-720C1333,-720 1339,-726 1339,-732"/>
+<polyline fill="none" stroke="#56d8b1" stroke-width="2" points="1339,-732 1339,-744 "/>
+<path fill="none" stroke="#56d8b1" stroke-width="2" d="M1339,-744C1339,-750 1333,-756 1327,-756"/>
+<text text-anchor="middle" x="1285" y="-735.5" font-family="sans" font-size="10.00">download_blacklist</text>
+</g>
+<!-- 46 -->
+<g id="node47" class="node"><title>46</title>
+<polyline fill="none" stroke="#56d8d0" stroke-width="2" points="1339,-684 1229,-684 "/>
+<path fill="none" stroke="#56d8d0" stroke-width="2" d="M1229,-684C1223,-684 1217,-678 1217,-672"/>
+<polyline fill="none" stroke="#56d8d0" stroke-width="2" points="1217,-672 1217,-660 "/>
+<path fill="none" stroke="#56d8d0" stroke-width="2" d="M1217,-660C1217,-654 1223,-648 1229,-648"/>
+<polyline fill="none" stroke="#56d8d0" stroke-width="2" points="1229,-648 1339,-648 "/>
+<path fill="none" stroke="#56d8d0" stroke-width="2" d="M1339,-648C1345,-648 1351,-654 1351,-660"/>
+<polyline fill="none" stroke="#56d8d0" stroke-width="2" points="1351,-660 1351,-672 "/>
+<path fill="none" stroke="#56d8d0" stroke-width="2" d="M1351,-672C1351,-678 1345,-684 1339,-684"/>
+<text text-anchor="middle" x="1284" y="-663.5" font-family="sans" font-size="10.00">ling_macs2_filter_peaks</text>
+</g>
+<!-- 36&#45;&gt;46 -->
+<g id="edge222" class="edge"><title>36&#45;&gt;46</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1284.75,-719.831C1284.64,-712.131 1284.51,-702.974 1284.39,-694.417"/>
+<polygon fill="grey" stroke="grey" points="1287.89,-694.364 1284.26,-684.413 1280.9,-694.461 1287.89,-694.364"/>
+</g>
+<!-- 48 -->
+<g id="node49" class="node"><title>48</title>
+<polyline fill="none" stroke="#56d8d8" stroke-width="2" points="1657,-684 1595,-684 "/>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1595,-684C1589,-684 1583,-678 1583,-672"/>
+<polyline fill="none" stroke="#56d8d8" stroke-width="2" points="1583,-672 1583,-660 "/>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1583,-660C1583,-654 1589,-648 1595,-648"/>
+<polyline fill="none" stroke="#56d8d8" stroke-width="2" points="1595,-648 1657,-648 "/>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1657,-648C1663,-648 1669,-654 1669,-660"/>
+<polyline fill="none" stroke="#56d8d8" stroke-width="2" points="1669,-660 1669,-672 "/>
+<path fill="none" stroke="#56d8d8" stroke-width="2" d="M1669,-672C1669,-678 1663,-684 1657,-684"/>
+<text text-anchor="middle" x="1626" y="-663.5" font-family="sans" font-size="10.00">bed6_blacklist</text>
+</g>
+<!-- 36&#45;&gt;48 -->
+<g id="edge228" class="edge"><title>36&#45;&gt;48</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1339.18,-722.659C1342.84,-721.729 1346.47,-720.833 1350,-720 1426.95,-701.847 1516.97,-685.054 1572.75,-675.163"/>
+<polygon fill="grey" stroke="grey" points="1573.51,-678.583 1582.75,-673.398 1572.3,-671.689 1573.51,-678.583"/>
+</g>
+<!-- 37&#45;&gt;7 -->
+<g id="edge18" class="edge"><title>37&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M791.466,-791.924C821.538,-781.362 859.48,-768.036 890.122,-757.274"/>
+<polygon fill="grey" stroke="grey" points="891.519,-760.493 899.794,-753.877 889.199,-753.889 891.519,-760.493"/>
+</g>
+<!-- 37&#45;&gt;17 -->
+<g id="edge84" class="edge"><title>37&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M672.981,-793.491C669.954,-792.946 666.95,-792.445 664,-792 545.297,-774.105 232.972,-808.468 125,-756 76.5977,-732.479 44,-719.814 44,-666 44,-666 44,-666 44,-162 44,21.9571 265.524,-95.2973 448,-72 741.271,-34.5571 1671.16,-21.6361 1931.61,-18.702"/>
+<polygon fill="grey" stroke="grey" points="1931.72,-22.2011 1941.68,-18.5898 1931.65,-15.2015 1931.72,-22.2011"/>
+</g>
+<!-- 37&#45;&gt;26 -->
+<g id="edge122" class="edge"><title>37&#45;&gt;26</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M734.048,-791.831C733.297,-784.131 733.076,-774.974 733.385,-766.417"/>
+<polygon fill="grey" stroke="grey" points="736.879,-766.616 734.024,-756.413 729.893,-766.17 736.879,-766.616"/>
+</g>
+<!-- 37&#45;&gt;41 -->
+<g id="edge174" class="edge"><title>37&#45;&gt;41</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M766.977,-791.762C805.307,-765.849 875.569,-718.348 917.909,-689.724"/>
+<polygon fill="grey" stroke="grey" points="919.918,-692.59 926.242,-684.09 915.998,-686.791 919.918,-692.59"/>
+</g>
+<!-- 37&#45;&gt;51 -->
+<g id="edge238" class="edge"><title>37&#45;&gt;51</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M713.716,-791.855C698.96,-781.537 680.295,-768.264 664,-756 635.445,-734.509 603.666,-709.057 581.101,-690.713"/>
+<polygon fill="grey" stroke="grey" points="583.191,-687.901 573.228,-684.297 578.769,-693.327 583.191,-687.901"/>
+</g>
+<!-- 38 -->
+<g id="node39" class="node"><title>38</title>
+<polyline fill="none" stroke="#8fd856" stroke-width="2" points="335,-1044 257,-1044 "/>
+<path fill="none" stroke="#8fd856" stroke-width="2" d="M257,-1044C251,-1044 245,-1038 245,-1032"/>
+<polyline fill="none" stroke="#8fd856" stroke-width="2" points="245,-1032 245,-1020 "/>
+<path fill="none" stroke="#8fd856" stroke-width="2" d="M245,-1020C245,-1014 251,-1008 257,-1008"/>
+<polyline fill="none" stroke="#8fd856" stroke-width="2" points="257,-1008 335,-1008 "/>
+<path fill="none" stroke="#8fd856" stroke-width="2" d="M335,-1008C341,-1008 347,-1014 347,-1020"/>
+<polyline fill="none" stroke="#8fd856" stroke-width="2" points="347,-1020 347,-1032 "/>
+<path fill="none" stroke="#8fd856" stroke-width="2" d="M347,-1032C347,-1038 341,-1044 335,-1044"/>
+<text text-anchor="middle" x="296" y="-1023.5" font-family="sans" font-size="10.00">bowtie2_align_pe</text>
+</g>
+<!-- 50 -->
+<g id="node51" class="node"><title>50</title>
+<polyline fill="none" stroke="#5663d8" stroke-width="2" points="374,-972 300,-972 "/>
+<path fill="none" stroke="#5663d8" stroke-width="2" d="M300,-972C294,-972 288,-966 288,-960"/>
+<polyline fill="none" stroke="#5663d8" stroke-width="2" points="288,-960 288,-948 "/>
+<path fill="none" stroke="#5663d8" stroke-width="2" d="M288,-948C288,-942 294,-936 300,-936"/>
+<polyline fill="none" stroke="#5663d8" stroke-width="2" points="300,-936 374,-936 "/>
+<path fill="none" stroke="#5663d8" stroke-width="2" d="M374,-936C380,-936 386,-942 386,-948"/>
+<polyline fill="none" stroke="#5663d8" stroke-width="2" points="386,-948 386,-960 "/>
+<path fill="none" stroke="#5663d8" stroke-width="2" d="M386,-960C386,-966 380,-972 374,-972"/>
+<text text-anchor="middle" x="337" y="-951.5" font-family="sans" font-size="10.00">picard_sort_sam</text>
+</g>
+<!-- 38&#45;&gt;50 -->
+<g id="edge236" class="edge"><title>38&#45;&gt;50</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M306.346,-1007.83C310.924,-999.792 316.405,-990.167 321.459,-981.292"/>
+<polygon fill="grey" stroke="grey" points="324.608,-982.835 326.515,-972.413 318.525,-979.371 324.608,-982.835"/>
+</g>
+<!-- 39&#45;&gt;7 -->
+<g id="edge22" class="edge"><title>39&#45;&gt;7</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1085.59,-863.987C1099.31,-844.71 1115.59,-813.946 1100,-792 1087.98,-775.083 1039.46,-759.935 999.866,-750.02"/>
+<polygon fill="grey" stroke="grey" points="1000.57,-746.588 990.02,-747.614 998.904,-753.388 1000.57,-746.588"/>
+</g>
+<!-- 39&#45;&gt;37 -->
+<g id="edge158" class="edge"><title>39&#45;&gt;37</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1032.71,-873.67C980.262,-862.263 884.904,-841.52 817.294,-826.813"/>
+<polygon fill="grey" stroke="grey" points="817.626,-823.304 807.11,-824.598 816.138,-830.144 817.626,-823.304"/>
+</g>
+<!-- 39&#45;&gt;61 -->
+<g id="edge266" class="edge"><title>39&#45;&gt;61</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1062.93,-863.831C1059.39,-855.877 1055.16,-846.369 1051.25,-837.572"/>
+<polygon fill="grey" stroke="grey" points="1054.44,-836.13 1047.18,-828.413 1048.05,-838.973 1054.44,-836.13"/>
+</g>
+<!-- 40&#45;&gt;11 -->
+<g id="edge50" class="edge"><title>40&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M667.278,-575.908C599.769,-547.973 471.039,-494.706 404.747,-467.275"/>
+<polygon fill="grey" stroke="grey" points="406.012,-464.01 395.434,-463.421 403.336,-470.479 406.012,-464.01"/>
+</g>
+<!-- 41&#45;&gt;1 -->
+<g id="edge6" class="edge"><title>41&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M852.336,-647.971C792.524,-637.258 716.768,-623.69 656.178,-612.838"/>
+<polygon fill="grey" stroke="grey" points="656.666,-609.37 646.205,-611.052 655.432,-616.26 656.666,-609.37"/>
+</g>
+<!-- 42 -->
+<g id="node43" class="node"><title>42</title>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="2098,-756 1956,-756 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M1956,-756C1950,-756 1944,-750 1944,-744"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="1944,-744 1944,-732 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M1944,-732C1944,-726 1950,-720 1956,-720"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="1956,-720 2098,-720 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M2098,-720C2104,-720 2110,-726 2110,-732"/>
+<polyline fill="none" stroke="#566bd8" stroke-width="2" points="2110,-732 2110,-744 "/>
+<path fill="none" stroke="#566bd8" stroke-width="2" d="M2110,-744C2110,-750 2104,-756 2098,-756"/>
+<text text-anchor="middle" x="2027" y="-735.5" font-family="sans" font-size="10.00">bamtools_create_filter_script</text>
+</g>
+<!-- 42&#45;&gt;57 -->
+<g id="edge252" class="edge"><title>42&#45;&gt;57</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M2032.05,-719.831C2034.21,-712.046 2036.79,-702.773 2039.18,-694.135"/>
+<polygon fill="grey" stroke="grey" points="2042.58,-694.985 2041.89,-684.413 2035.84,-693.112 2042.58,-694.985"/>
+</g>
+<!-- 43&#45;&gt;18 -->
+<g id="edge86" class="edge"><title>43&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1224.91,-158.503C1109.08,-147.278 745.165,-112.013 589.635,-96.9417"/>
+<polygon fill="grey" stroke="grey" points="589.54,-93.4163 579.249,-95.9353 588.865,-100.384 589.54,-93.4163"/>
+</g>
+<!-- 44&#45;&gt;1 -->
+<g id="edge4" class="edge"><title>44&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M692.307,-647.831C666.693,-638.073 634.94,-625.977 608.026,-615.724"/>
+<polygon fill="grey" stroke="grey" points="608.941,-612.327 598.35,-612.038 606.449,-618.869 608.941,-612.327"/>
+</g>
+<!-- 45&#45;&gt;11 -->
+<g id="edge42" class="edge"><title>45&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M245.618,-503.831C268.771,-493.213 297.962,-479.826 321.442,-469.059"/>
+<polygon fill="grey" stroke="grey" points="323.16,-472.121 330.791,-464.771 320.242,-465.758 323.16,-472.121"/>
+</g>
+<!-- 45&#45;&gt;18 -->
+<g id="edge88" class="edge"><title>45&#45;&gt;18</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M216.415,-503.557C230.532,-476.779 254,-425.171 254,-378 254,-378 254,-378 254,-234 254,-146.062 368.288,-111.504 446.471,-98.1517"/>
+<polygon fill="grey" stroke="grey" points="447.46,-101.537 456.77,-96.4786 446.338,-94.6276 447.46,-101.537"/>
+</g>
+<!-- 46&#45;&gt;32 -->
+<g id="edge142" class="edge"><title>46&#45;&gt;32</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1216.77,-649.729C1023.99,-603.039 482.915,-471.719 480,-468 430.656,-405.03 441.857,-358.321 480,-288 486.793,-275.475 497.888,-265.4 509.787,-257.502"/>
+<polygon fill="grey" stroke="grey" points="511.849,-260.343 518.543,-252.131 508.189,-254.376 511.849,-260.343"/>
+</g>
+<!-- 46&#45;&gt;34 -->
+<g id="edge146" class="edge"><title>46&#45;&gt;34</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1251.05,-647.885C1187.17,-613.656 1041.13,-539.717 909,-504 780.215,-469.188 742.019,-493.129 611,-468 609.718,-467.754 608.425,-467.497 607.123,-467.231"/>
+<polygon fill="grey" stroke="grey" points="607.552,-463.742 597.035,-465.026 606.057,-470.581 607.552,-463.742"/>
+</g>
+<!-- 47 -->
+<g id="node48" class="node"><title>47</title>
+<polyline fill="none" stroke="#56d8c9" stroke-width="2" points="878,-612 788,-612 "/>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M788,-612C782,-612 776,-606 776,-600"/>
+<polyline fill="none" stroke="#56d8c9" stroke-width="2" points="776,-600 776,-588 "/>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M776,-588C776,-582 782,-576 788,-576"/>
+<polyline fill="none" stroke="#56d8c9" stroke-width="2" points="788,-576 878,-576 "/>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M878,-576C884,-576 890,-582 890,-588"/>
+<polyline fill="none" stroke="#56d8c9" stroke-width="2" points="890,-588 890,-600 "/>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M890,-600C890,-606 884,-612 878,-612"/>
+<text text-anchor="middle" x="833" y="-591.5" font-family="sans" font-size="10.00">peaks_vs_coverage</text>
+</g>
+<!-- 47&#45;&gt;11 -->
+<g id="edge44" class="edge"><title>47&#45;&gt;11</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M778.032,-575.933C721.305,-557.411 630.182,-528.002 551,-504 500.881,-488.808 442.921,-472.323 404.755,-461.616"/>
+<polygon fill="grey" stroke="grey" points="405.591,-458.215 395.018,-458.889 403.703,-464.956 405.591,-458.215"/>
+</g>
+<!-- 48&#45;&gt;14 -->
+<g id="edge60" class="edge"><title>48&#45;&gt;14</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1669.07,-650.357C1717.45,-630.927 1795.76,-593.655 1846,-540 1851.14,-534.506 1896.25,-449.608 1919.8,-404.971"/>
+<polygon fill="grey" stroke="grey" points="1922.91,-406.578 1924.48,-396.1 1916.72,-403.313 1922.91,-406.578"/>
+</g>
+<!-- 48&#45;&gt;15 -->
+<g id="edge62" class="edge"><title>48&#45;&gt;15</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1606.5,-647.762C1579.19,-622.227 1529.46,-575.732 1498.73,-546.991"/>
+<polygon fill="grey" stroke="grey" points="1501.04,-544.363 1491.35,-540.09 1496.26,-549.476 1501.04,-544.363"/>
+</g>
+<!-- 48&#45;&gt;17 -->
+<g id="edge72" class="edge"><title>48&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1669.1,-660.875C1777.89,-646.746 2055,-602.822 2055,-522 2055,-522 2055,-522 2055,-162 2055,-121.118 2051.85,-109.686 2036,-72 2032.05,-62.5953 2026.38,-53.0879 2020.72,-44.727"/>
+<polygon fill="grey" stroke="grey" points="2023.52,-42.6231 2014.89,-36.4815 2017.8,-46.6664 2023.52,-42.6231"/>
+</g>
+<!-- 48&#45;&gt;25 -->
+<g id="edge120" class="edge"><title>48&#45;&gt;25</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1620.66,-647.674C1617.88,-637.3 1614.7,-624.031 1613,-612 1610.09,-591.383 1609.12,-567.871 1608.87,-550.05"/>
+<polygon fill="grey" stroke="grey" points="1612.37,-549.995 1608.79,-540.023 1605.37,-550.05 1612.37,-549.995"/>
+</g>
+<!-- 48&#45;&gt;28 -->
+<g id="edge130" class="edge"><title>48&#45;&gt;28</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1656.13,-647.951C1671.05,-638.27 1688.85,-625.557 1703,-612 1723,-592.83 1741.86,-567.542 1754.79,-548.654"/>
+<polygon fill="grey" stroke="grey" points="1757.79,-550.463 1760.47,-540.211 1751.99,-546.556 1757.79,-550.463"/>
+</g>
+<!-- 48&#45;&gt;31 -->
+<g id="edge138" class="edge"><title>48&#45;&gt;31</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1634.07,-647.831C1637.61,-639.877 1641.84,-630.369 1645.75,-621.572"/>
+<polygon fill="grey" stroke="grey" points="1648.95,-622.973 1649.82,-612.413 1642.56,-620.13 1648.95,-622.973"/>
+</g>
+<!-- 49&#45;&gt;24 -->
+<g id="edge108" class="edge"><title>49&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M583.199,-291.722C587.85,-290.421 592.509,-289.158 597,-288 627.183,-280.217 700.352,-264.159 758.744,-251.567"/>
+<polygon fill="grey" stroke="grey" points="759.68,-254.946 768.719,-249.419 758.206,-248.103 759.68,-254.946"/>
+</g>
+<!-- 49&#45;&gt;32 -->
+<g id="edge140" class="edge"><title>49&#45;&gt;32</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M542.813,-287.831C545.764,-279.962 549.286,-270.571 552.555,-261.853"/>
+<polygon fill="grey" stroke="grey" points="555.861,-263.006 556.095,-252.413 549.307,-260.548 555.861,-263.006"/>
+</g>
+<!-- 49&#45;&gt;43 -->
+<g id="edge190" class="edge"><title>49&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M583.088,-290.07C612.384,-279.852 650.692,-265.93 684,-252 718.482,-237.579 724.027,-226.145 760,-216 844.949,-192.044 1107.24,-172.304 1214.53,-165.017"/>
+<polygon fill="grey" stroke="grey" points="1214.98,-168.495 1224.72,-164.331 1214.51,-161.511 1214.98,-168.495"/>
+</g>
+<!-- 50&#45;&gt;8 -->
+<g id="edge28" class="edge"><title>50&#45;&gt;8</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M357.692,-935.831C367.695,-927.048 379.855,-916.371 390.7,-906.849"/>
+<polygon fill="grey" stroke="grey" points="393.109,-909.391 398.315,-900.163 388.491,-904.131 393.109,-909.391"/>
+</g>
+<!-- 51&#45;&gt;1 -->
+<g id="edge8" class="edge"><title>51&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M551,-647.831C551,-640.131 551,-630.974 551,-622.417"/>
+<polygon fill="grey" stroke="grey" points="554.5,-622.413 551,-612.413 547.5,-622.413 554.5,-622.413"/>
+</g>
+<!-- 52 -->
+<g id="node53" class="node"><title>52</title>
+<polyline fill="none" stroke="#569ad8" stroke-width="2" points="1349,-324 1169,-324 "/>
+<path fill="none" stroke="#569ad8" stroke-width="2" d="M1169,-324C1163,-324 1157,-318 1157,-312"/>
+<polyline fill="none" stroke="#569ad8" stroke-width="2" points="1157,-312 1157,-300 "/>
+<path fill="none" stroke="#569ad8" stroke-width="2" d="M1157,-300C1157,-294 1163,-288 1169,-288"/>
+<polyline fill="none" stroke="#569ad8" stroke-width="2" points="1169,-288 1349,-288 "/>
+<path fill="none" stroke="#569ad8" stroke-width="2" d="M1349,-288C1355,-288 1361,-294 1361,-300"/>
+<polyline fill="none" stroke="#569ad8" stroke-width="2" points="1361,-300 1361,-312 "/>
+<path fill="none" stroke="#569ad8" stroke-width="2" d="M1361,-312C1361,-318 1355,-324 1349,-324"/>
+<text text-anchor="middle" x="1259" y="-303.5" font-family="sans" font-size="10.00">aggregate_histone_mark_comparison</text>
+</g>
+<!-- 52&#45;&gt;24 -->
+<g id="edge110" class="edge"><title>52&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1156.75,-288.387C1084.9,-276.012 990.059,-259.675 923.134,-248.147"/>
+<polygon fill="grey" stroke="grey" points="923.504,-244.66 913.055,-246.411 922.315,-251.558 923.504,-244.66"/>
+</g>
+<!-- 52&#45;&gt;43 -->
+<g id="edge192" class="edge"><title>52&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1238.6,-287.563C1229.51,-277.967 1219.78,-265.444 1215,-252 1209.64,-236.926 1209.47,-231.015 1215,-216 1218.8,-205.666 1225.56,-195.962 1232.75,-187.71"/>
+<polygon fill="grey" stroke="grey" points="1235.43,-189.97 1239.69,-180.269 1230.31,-185.197 1235.43,-189.97"/>
+</g>
+<!-- 53 -->
+<g id="node54" class="node"><title>53</title>
+<polyline fill="none" stroke="#61d856" stroke-width="2" points="1573,-756 1371,-756 "/>
+<path fill="none" stroke="#61d856" stroke-width="2" d="M1371,-756C1365,-756 1359,-750 1359,-744"/>
+<polyline fill="none" stroke="#61d856" stroke-width="2" points="1359,-744 1359,-732 "/>
+<path fill="none" stroke="#61d856" stroke-width="2" d="M1359,-732C1359,-726 1365,-720 1371,-720"/>
+<polyline fill="none" stroke="#61d856" stroke-width="2" points="1371,-720 1573,-720 "/>
+<path fill="none" stroke="#61d856" stroke-width="2" d="M1573,-720C1579,-720 1585,-726 1585,-732"/>
+<polyline fill="none" stroke="#61d856" stroke-width="2" points="1585,-732 1585,-744 "/>
+<path fill="none" stroke="#61d856" stroke-width="2" d="M1585,-744C1585,-750 1579,-756 1573,-756"/>
+<text text-anchor="middle" x="1472" y="-735.5" font-family="sans" font-size="10.00">macs2_callpeak_treatment_only_bam_pe</text>
+</g>
+<!-- 53&#45;&gt;15 -->
+<g id="edge64" class="edge"><title>53&#45;&gt;15</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1472,-719.955C1472,-682.394 1472,-595.554 1472,-550.41"/>
+<polygon fill="grey" stroke="grey" points="1475.5,-550.145 1472,-540.145 1468.5,-550.145 1475.5,-550.145"/>
+</g>
+<!-- 53&#45;&gt;17 -->
+<g id="edge80" class="edge"><title>53&#45;&gt;17</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1585.18,-722.307C1764.3,-694.257 2093,-628.3 2093,-522 2093,-522 2093,-522 2093,-162 2093,-118.752 2080.31,-107.772 2056,-72 2048.88,-61.527 2039.47,-51.4826 2030.39,-42.9576"/>
+<polygon fill="grey" stroke="grey" points="2032.67,-40.2969 2022.91,-36.1778 2027.97,-45.4851 2032.67,-40.2969"/>
+</g>
+<!-- 53&#45;&gt;28 -->
+<g id="edge132" class="edge"><title>53&#45;&gt;28</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1585.22,-722.612C1617.1,-714.613 1650.54,-702.46 1678,-684 1725.61,-651.998 1752.24,-587.171 1764.2,-549.957"/>
+<polygon fill="grey" stroke="grey" points="1767.59,-550.83 1767.19,-540.243 1760.9,-548.772 1767.59,-550.83"/>
+</g>
+<!-- 53&#45;&gt;46 -->
+<g id="edge224" class="edge"><title>53&#45;&gt;46</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1424.56,-719.831C1399.08,-710.073 1367.5,-697.977 1340.72,-687.724"/>
+<polygon fill="grey" stroke="grey" points="1341.69,-684.346 1331.1,-684.038 1339.19,-690.883 1341.69,-684.346"/>
+</g>
+<!-- 53&#45;&gt;47 -->
+<g id="edge226" class="edge"><title>53&#45;&gt;47</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1370.14,-719.988C1320.86,-710.557 1261.02,-698.05 1208,-684 1158.28,-670.824 1147.84,-660.721 1098,-648 1013.03,-626.314 989.659,-630.785 904,-612 902.742,-611.724 901.473,-611.442 900.195,-611.154"/>
+<polygon fill="grey" stroke="grey" points="900.807,-607.704 890.276,-608.86 899.229,-614.524 900.807,-607.704"/>
+</g>
+<!-- 53&#45;&gt;48 -->
+<g id="edge230" class="edge"><title>53&#45;&gt;48</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1510.86,-719.831C1531.27,-710.287 1556.47,-698.506 1578.09,-688.401"/>
+<polygon fill="grey" stroke="grey" points="1579.84,-691.444 1587.42,-684.038 1576.88,-685.103 1579.84,-691.444"/>
+</g>
+<!-- 54 -->
+<g id="node55" class="node"><title>54</title>
+<polyline fill="none" stroke="#56d0d8" stroke-width="2" points="1151,-252 1063,-252 "/>
+<path fill="none" stroke="#56d0d8" stroke-width="2" d="M1063,-252C1057,-252 1051,-246 1051,-240"/>
+<polyline fill="none" stroke="#56d0d8" stroke-width="2" points="1051,-240 1051,-228 "/>
+<path fill="none" stroke="#56d0d8" stroke-width="2" d="M1051,-228C1051,-222 1057,-216 1063,-216"/>
+<polyline fill="none" stroke="#56d0d8" stroke-width="2" points="1063,-216 1151,-216 "/>
+<path fill="none" stroke="#56d0d8" stroke-width="2" d="M1151,-216C1157,-216 1163,-222 1163,-228"/>
+<polyline fill="none" stroke="#56d0d8" stroke-width="2" points="1163,-228 1163,-240 "/>
+<path fill="none" stroke="#56d0d8" stroke-width="2" d="M1163,-240C1163,-246 1157,-252 1151,-252"/>
+<text text-anchor="middle" x="1107" y="-231.5" font-family="sans" font-size="10.00">compare_to_FAIRE</text>
+</g>
+<!-- 54&#45;&gt;10 -->
+<g id="edge32" class="edge"><title>54&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1050.83,-223.989C980.653,-211.483 860.206,-190.017 781.087,-175.916"/>
+<polygon fill="grey" stroke="grey" points="781.628,-172.458 771.169,-174.149 780.4,-179.349 781.628,-172.458"/>
+</g>
+<!-- 54&#45;&gt;43 -->
+<g id="edge198" class="edge"><title>54&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1145.86,-215.831C1167.07,-205.917 1193.44,-193.588 1215.59,-183.231"/>
+<polygon fill="grey" stroke="grey" points="1217.22,-186.332 1224.8,-178.926 1214.26,-179.991 1217.22,-186.332"/>
+</g>
+<!-- 54&#45;&gt;58 -->
+<g id="edge256" class="edge"><title>54&#45;&gt;58</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1108.21,-252.115C1105.22,-289.669 1091.61,-374.315 1109,-396 1152.99,-450.836 1195.22,-417.881 1261.41,-429.961"/>
+<polygon fill="grey" stroke="grey" points="1260.84,-433.419 1271.35,-432.086 1262.3,-426.574 1260.84,-433.419"/>
+</g>
+<!-- 55 -->
+<g id="node56" class="node"><title>55</title>
+<polyline fill="none" stroke="#56d88a" stroke-width="2" points="194,-1116 72,-1116 "/>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M72,-1116C66,-1116 60,-1110 60,-1104"/>
+<polyline fill="none" stroke="#56d88a" stroke-width="2" points="60,-1104 60,-1092 "/>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M60,-1092C60,-1086 66,-1080 72,-1080"/>
+<polyline fill="none" stroke="#56d88a" stroke-width="2" points="72,-1080 194,-1080 "/>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M194,-1080C200,-1080 206,-1086 206,-1092"/>
+<polyline fill="none" stroke="#56d88a" stroke-width="2" points="206,-1092 206,-1104 "/>
+<path fill="none" stroke="#56d88a" stroke-width="2" d="M206,-1104C206,-1110 200,-1116 194,-1116"/>
+<text text-anchor="middle" x="133" y="-1095.5" font-family="sans" font-size="10.00">cutadapt_cut_paired_end</text>
+</g>
+<!-- 55&#45;&gt;5 -->
+<g id="edge14" class="edge"><title>55&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M125.43,-1079.83C122.151,-1071.96 118.238,-1062.57 114.606,-1053.85"/>
+<polygon fill="grey" stroke="grey" points="117.749,-1052.3 110.672,-1044.41 111.288,-1054.99 117.749,-1052.3"/>
+</g>
+<!-- 55&#45;&gt;38 -->
+<g id="edge168" class="edge"><title>55&#45;&gt;38</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M174.132,-1079.83C195.932,-1070.2 222.888,-1058.29 245.9,-1048.13"/>
+<polygon fill="grey" stroke="grey" points="247.431,-1051.28 255.164,-1044.04 244.602,-1044.88 247.431,-1051.28"/>
+</g>
+<!-- 56&#45;&gt;24 -->
+<g id="edge112" class="edge"><title>56&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M956.887,-287.831C936.249,-278.245 910.752,-266.401 888.933,-256.266"/>
+<polygon fill="grey" stroke="grey" points="890.376,-253.077 879.832,-252.038 887.427,-259.425 890.376,-253.077"/>
+</g>
+<!-- 57&#45;&gt;6 -->
+<g id="edge16" class="edge"><title>57&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1983.9,-682.826C1797.24,-732.601 1248.1,-879.041 1044.29,-933.389"/>
+<polygon fill="grey" stroke="grey" points="1043.32,-930.026 1034.56,-935.984 1045.12,-936.789 1043.32,-930.026"/>
+</g>
+<!-- 58&#45;&gt;14 -->
+<g id="edge56" class="edge"><title>58&#45;&gt;14</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1387.11,-443.304C1474.39,-433.218 1644.51,-413.449 1789,-396 1815.12,-392.846 1843.87,-389.293 1868.84,-386.182"/>
+<polygon fill="grey" stroke="grey" points="1869.44,-389.635 1878.93,-384.924 1868.57,-382.689 1869.44,-389.635"/>
+</g>
+<!-- 58&#45;&gt;16 -->
+<g id="edge68" class="edge"><title>58&#45;&gt;16</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1271,-433.964C1267.96,-433.267 1264.95,-432.607 1262,-432 1152.48,-409.454 1121.45,-423.215 1013,-396 943.238,-378.495 865.747,-348.601 816.258,-328.104"/>
+<polygon fill="grey" stroke="grey" points="817.321,-324.755 806.744,-324.136 814.626,-331.216 817.321,-324.755"/>
+</g>
+<!-- 58&#45;&gt;20 -->
+<g id="edge94" class="edge"><title>58&#45;&gt;20</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1387.36,-446.841C1498.57,-440.223 1732,-423.221 1756,-396 1771.28,-378.663 1762.25,-352.534 1751.33,-332.889"/>
+<polygon fill="grey" stroke="grey" points="1754.22,-330.9 1746.09,-324.103 1748.21,-334.485 1754.22,-330.9"/>
+</g>
+<!-- 58&#45;&gt;24 -->
+<g id="edge114" class="edge"><title>58&#45;&gt;24</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1270.69,-433.593C1267.76,-433.003 1264.85,-432.466 1262,-432 1197.62,-421.484 723.25,-440.976 676,-396 641.137,-362.815 639.484,-326.774 668,-288 679.471,-272.403 719.896,-259.322 758.784,-249.943"/>
+<polygon fill="grey" stroke="grey" points="759.681,-253.327 768.619,-247.639 758.085,-246.512 759.681,-253.327"/>
+</g>
+<!-- 58&#45;&gt;27 -->
+<g id="edge126" class="edge"><title>58&#45;&gt;27</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1387.29,-440.755C1435.56,-431.767 1498.64,-416.478 1514,-396 1528.31,-376.909 1516.07,-351.098 1502.44,-332.013"/>
+<polygon fill="grey" stroke="grey" points="1505.22,-329.878 1496.38,-324.025 1499.64,-334.108 1505.22,-329.878"/>
+</g>
+<!-- 58&#45;&gt;35 -->
+<g id="edge148" class="edge"><title>58&#45;&gt;35</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1387.4,-440.64C1439.35,-431.226 1510.21,-415.354 1531,-396 1568.9,-360.721 1581.15,-298.559 1585.11,-262.357"/>
+<polygon fill="grey" stroke="grey" points="1588.63,-262.299 1586.09,-252.013 1581.66,-261.638 1588.63,-262.299"/>
+</g>
+<!-- 58&#45;&gt;43 -->
+<g id="edge202" class="edge"><title>58&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1330.98,-431.888C1333.44,-413.33 1338.5,-383.893 1348,-360 1354.93,-342.575 1364.92,-342.05 1370,-324 1374.33,-308.598 1379.58,-300.815 1370,-288 1336.15,-242.72 1281.85,-297.28 1248,-252 1234.72,-234.233 1239.93,-208.857 1247.34,-189.577"/>
+<polygon fill="grey" stroke="grey" points="1250.67,-190.697 1251.33,-180.123 1244.22,-187.977 1250.67,-190.697"/>
+</g>
+<!-- 58&#45;&gt;49 -->
+<g id="edge232" class="edge"><title>58&#45;&gt;49</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1270.69,-433.579C1267.76,-432.992 1264.85,-432.46 1262,-432 1124.22,-409.778 763.677,-450.038 635,-396 602.527,-382.363 573.79,-353.306 555.756,-331.881"/>
+<polygon fill="grey" stroke="grey" points="558.424,-329.614 549.383,-324.091 553.006,-334.047 558.424,-329.614"/>
+</g>
+<!-- 58&#45;&gt;54 -->
+<g id="edge246" class="edge"><title>58&#45;&gt;54</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1270.73,-436.484C1264.72,-434.933 1258.73,-433.406 1253,-432 1181.37,-414.428 1137.15,-453.533 1091,-396 1075,-376.052 1085.24,-302.822 1095.74,-261.986"/>
+<polygon fill="grey" stroke="grey" points="1099.18,-262.687 1098.48,-252.115 1092.44,-260.818 1099.18,-262.687"/>
+</g>
+<!-- 58&#45;&gt;56 -->
+<g id="edge248" class="edge"><title>58&#45;&gt;56</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1270.98,-434.05C1267.95,-433.331 1264.94,-432.642 1262,-432 1168.2,-411.522 1128.08,-450.449 1049,-396 1027.31,-381.065 1013.17,-354.109 1005,-333.578"/>
+<polygon fill="grey" stroke="grey" points="1008.23,-332.241 1001.46,-324.097 1001.68,-334.689 1008.23,-332.241"/>
+</g>
+<!-- 60 -->
+<g id="node61" class="node"><title>60</title>
+<polyline fill="none" stroke="#d8d356" stroke-width="2" points="1735,-396 1649,-396 "/>
+<path fill="none" stroke="#d8d356" stroke-width="2" d="M1649,-396C1643,-396 1637,-390 1637,-384"/>
+<polyline fill="none" stroke="#d8d356" stroke-width="2" points="1637,-384 1637,-372 "/>
+<path fill="none" stroke="#d8d356" stroke-width="2" d="M1637,-372C1637,-366 1643,-360 1649,-360"/>
+<polyline fill="none" stroke="#d8d356" stroke-width="2" points="1649,-360 1735,-360 "/>
+<path fill="none" stroke="#d8d356" stroke-width="2" d="M1735,-360C1741,-360 1747,-366 1747,-372"/>
+<polyline fill="none" stroke="#d8d356" stroke-width="2" points="1747,-372 1747,-384 "/>
+<path fill="none" stroke="#d8d356" stroke-width="2" d="M1747,-384C1747,-390 1741,-396 1735,-396"/>
+<text text-anchor="middle" x="1692" y="-375.5" font-family="sans" font-size="10.00">homer_find_motifs</text>
+</g>
+<!-- 58&#45;&gt;60 -->
+<g id="edge262" class="edge"><title>58&#45;&gt;60</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1387.32,-438.432C1452.71,-425.463 1558.48,-404.484 1626.61,-390.97"/>
+<polygon fill="grey" stroke="grey" points="1627.67,-394.328 1636.8,-388.949 1626.31,-387.462 1627.67,-394.328"/>
+</g>
+<!-- 58&#45;&gt;63 -->
+<g id="edge272" class="edge"><title>58&#45;&gt;63</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1297.71,-431.831C1281.7,-422.537 1262.04,-411.121 1244.95,-401.197"/>
+<polygon fill="grey" stroke="grey" points="1246.69,-398.157 1236.28,-396.163 1243.17,-404.211 1246.69,-398.157"/>
+</g>
+<!-- 59&#45;&gt;43 -->
+<g id="edge204" class="edge"><title>59&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1285.67,-215.831C1282.03,-207.877 1277.67,-198.369 1273.64,-189.572"/>
+<polygon fill="grey" stroke="grey" points="1276.79,-188.046 1269.44,-180.413 1270.42,-190.962 1276.79,-188.046"/>
+</g>
+<!-- 60&#45;&gt;20 -->
+<g id="edge96" class="edge"><title>60&#45;&gt;20</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1702.35,-359.831C1706.92,-351.792 1712.41,-342.167 1717.46,-333.292"/>
+<polygon fill="grey" stroke="grey" points="1720.61,-334.835 1722.51,-324.413 1714.52,-331.371 1720.61,-334.835"/>
+</g>
+<!-- 60&#45;&gt;43 -->
+<g id="edge206" class="edge"><title>60&#45;&gt;43</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1680.68,-359.768C1675.06,-349.641 1668.87,-336.579 1666,-324 1662.44,-308.401 1664.25,-303.904 1666,-288 1667.79,-271.708 1672.21,-268.292 1674,-252 1675.75,-236.096 1684.55,-228.025 1674,-216 1650.08,-188.744 1410.03,-171.045 1307.28,-164.662"/>
+<polygon fill="grey" stroke="grey" points="1307.38,-161.162 1297.19,-164.045 1306.96,-168.149 1307.38,-161.162"/>
+</g>
+<!-- 61&#45;&gt;53 -->
+<g id="edge244" class="edge"><title>61&#45;&gt;53</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1091.23,-801.315C1155.81,-790.576 1267.71,-771.969 1353.55,-757.697"/>
+<polygon fill="grey" stroke="grey" points="1354.32,-761.116 1363.61,-756.023 1353.18,-754.21 1354.32,-761.116"/>
+</g>
+<!-- 62 -->
+<g id="node63" class="node"><title>62</title>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="326,-1116 266,-1116 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M266,-1116C260,-1116 254,-1110 254,-1104"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="254,-1104 254,-1092 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M254,-1092C254,-1086 260,-1080 266,-1080"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="266,-1080 326,-1080 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M326,-1080C332,-1080 338,-1086 338,-1092"/>
+<polyline fill="none" stroke="#56a2d8" stroke-width="2" points="338,-1092 338,-1104 "/>
+<path fill="none" stroke="#56a2d8" stroke-width="2" d="M338,-1104C338,-1110 332,-1116 326,-1116"/>
+<text text-anchor="middle" x="296" y="-1095.5" font-family="sans" font-size="10.00">bowtie2_build</text>
+</g>
+<!-- 62&#45;&gt;38 -->
+<g id="edge166" class="edge"><title>62&#45;&gt;38</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M296,-1079.83C296,-1072.13 296,-1062.97 296,-1054.42"/>
+<polygon fill="grey" stroke="grey" points="299.5,-1054.41 296,-1044.41 292.5,-1054.41 299.5,-1054.41"/>
+</g>
+<!-- 63&#45;&gt;52 -->
+<g id="edge242" class="edge"><title>63&#45;&gt;52</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1218.63,-359.831C1224.85,-351.539 1232.33,-341.557 1239.16,-332.453"/>
+<polygon fill="grey" stroke="grey" points="1241.99,-334.513 1245.19,-324.413 1236.39,-330.313 1241.99,-334.513"/>
+</g>
+<!-- 64 -->
+<g id="node65" class="node"><title>64</title>
+<polyline fill="none" stroke="#5673d8" stroke-width="2" points="2015,-468 1927,-468 "/>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M1927,-468C1921,-468 1915,-462 1915,-456"/>
+<polyline fill="none" stroke="#5673d8" stroke-width="2" points="1915,-456 1915,-444 "/>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M1915,-444C1915,-438 1921,-432 1927,-432"/>
+<polyline fill="none" stroke="#5673d8" stroke-width="2" points="1927,-432 2015,-432 "/>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M2015,-432C2021,-432 2027,-438 2027,-444"/>
+<polyline fill="none" stroke="#5673d8" stroke-width="2" points="2027,-444 2027,-456 "/>
+<path fill="none" stroke="#5673d8" stroke-width="2" d="M2027,-456C2027,-462 2021,-468 2015,-468"/>
+<text text-anchor="middle" x="1971" y="-447.5" font-family="sans" font-size="10.00">fetch_chrom_sizes</text>
+</g>
+<!-- 64&#45;&gt;14 -->
+<g id="edge58" class="edge"><title>64&#45;&gt;14</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1961.66,-431.831C1957.58,-423.877 1952.69,-414.369 1948.17,-405.572"/>
+<polygon fill="grey" stroke="grey" points="1951.15,-403.708 1943.46,-396.413 1944.92,-406.907 1951.15,-403.708"/>
+</g>
+<!-- 64&#45;&gt;33 -->
+<g id="edge144" class="edge"><title>64&#45;&gt;33</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M1981.72,-431.522C1987.25,-421.324 1993.72,-408.271 1998,-396 2011.47,-357.402 2017,-346.882 2017,-306 2017,-306 2017,-306 2017,-234 2017,-193.597 2010.32,-147.255 2005.33,-118.301"/>
+<polygon fill="grey" stroke="grey" points="2008.72,-117.339 2003.52,-108.104 2001.82,-118.562 2008.72,-117.339"/>
+</g>
+</g>
+</svg>
diff --git a/pages/images/rulegraph_mrsa.svg b/pages/images/rulegraph_mrsa.svg
new file mode 100644
index 00000000..d8611231
--- /dev/null
+++ b/pages/images/rulegraph_mrsa.svg
@@ -0,0 +1,123 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+ "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Generated by graphviz version 2.38.0 (20140413.2041)
+ -->
+<!-- Title: snakemake_dag Pages: 1 -->
+<svg width="349pt" height="404pt"
+ viewBox="0.00 0.00 348.50 404.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 400)">
+<title>snakemake_dag</title>
+<polygon fill="white" stroke="none" points="-4,4 -4,-400 344.5,-400 344.5,4 -4,4"/>
+<!-- 0 -->
+<g id="node1" class="node"><title>0</title>
+<path fill="none" stroke="#56d8c9" stroke-width="2" d="M163.5,-252C163.5,-252 86.5,-252 86.5,-252 80.5,-252 74.5,-246 74.5,-240 74.5,-240 74.5,-228 74.5,-228 74.5,-222 80.5,-216 86.5,-216 86.5,-216 163.5,-216 163.5,-216 169.5,-216 175.5,-222 175.5,-228 175.5,-228 175.5,-240 175.5,-240 175.5,-246 169.5,-252 163.5,-252"/>
+<text text-anchor="middle" x="125" y="-231.5" font-family="sans" font-size="10.00">align_to_genome</text>
+</g>
+<!-- 3 -->
+<g id="node4" class="node"><title>3</title>
+<path fill="none" stroke="#56d86b" stroke-width="2" d="M137,-180C137,-180 99,-180 99,-180 93,-180 87,-174 87,-168 87,-168 87,-156 87,-156 87,-150 93,-144 99,-144 99,-144 137,-144 137,-144 143,-144 149,-150 149,-156 149,-156 149,-168 149,-168 149,-174 143,-180 137,-180"/>
+<text text-anchor="middle" x="118" y="-159.5" font-family="sans" font-size="10.00">sort_bam</text>
+</g>
+<!-- 0&#45;&gt;3 -->
+<g id="edge6" class="edge"><title>0&#45;&gt;3</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M123.27,-215.697C122.498,-207.983 121.571,-198.712 120.711,-190.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="124.188,-189.706 119.71,-180.104 117.223,-190.403 124.188,-189.706"/>
+</g>
+<!-- 1 -->
+<g id="node2" class="node"><title>1</title>
+<path fill="none" stroke="#70d856" stroke-width="2" d="M206,-36C206,-36 176,-36 176,-36 170,-36 164,-30 164,-24 164,-24 164,-12 164,-12 164,-6 170,-0 176,-0 176,-0 206,-0 206,-0 212,-0 218,-6 218,-12 218,-12 218,-24 218,-24 218,-30 212,-36 206,-36"/>
+<text text-anchor="middle" x="191" y="-15.5" font-family="sans" font-size="10.00">all</text>
+</g>
+<!-- 2 -->
+<g id="node3" class="node"><title>2</title>
+<path fill="none" stroke="#9fd856" stroke-width="2" d="M100,-108C100,-108 12,-108 12,-108 6,-108 0,-102 0,-96 0,-96 0,-84 0,-84 0,-78 6,-72 12,-72 12,-72 100,-72 100,-72 106,-72 112,-78 112,-84 112,-84 112,-96 112,-96 112,-102 106,-108 100,-108"/>
+<text text-anchor="middle" x="56" y="-87.5" font-family="sans" font-size="10.00">generate_rulegraph</text>
+</g>
+<!-- 2&#45;&gt;1 -->
+<g id="edge3" class="edge"><title>2&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M89.0247,-71.8761C108.926,-61.5567 134.293,-48.4034 154.748,-37.7973"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="156.497,-40.8331 163.763,-33.1228 153.275,-34.6188 156.497,-40.8331"/>
+</g>
+<!-- 10 -->
+<g id="node11" class="node"><title>10</title>
+<path fill="none" stroke="#d88556" stroke-width="2" d="M240,-108C240,-108 142,-108 142,-108 136,-108 130,-102 130,-96 130,-96 130,-84 130,-84 130,-78 136,-72 142,-72 142,-72 240,-72 240,-72 246,-72 252,-78 252,-84 252,-84 252,-96 252,-96 252,-102 246,-108 240,-108"/>
+<text text-anchor="middle" x="191" y="-87.5" font-family="sans" font-size="10.00">generate_count_table</text>
+</g>
+<!-- 3&#45;&gt;10 -->
+<g id="edge11" class="edge"><title>3&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M136.045,-143.697C145.062,-135.05 156.118,-124.449 165.943,-115.027"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="168.367,-117.552 173.163,-108.104 163.522,-112.499 168.367,-117.552"/>
+</g>
+<!-- 4 -->
+<g id="node5" class="node"><title>4</title>
+<path fill="none" stroke="#d8b456" stroke-width="2" d="M256.5,-180C256.5,-180 179.5,-180 179.5,-180 173.5,-180 167.5,-174 167.5,-168 167.5,-168 167.5,-156 167.5,-156 167.5,-150 173.5,-144 179.5,-144 179.5,-144 256.5,-144 256.5,-144 262.5,-144 268.5,-150 268.5,-156 268.5,-156 268.5,-168 268.5,-168 268.5,-174 262.5,-180 256.5,-180"/>
+<text text-anchor="middle" x="218" y="-159.5" font-family="sans" font-size="10.00">get_genome_gff3</text>
+</g>
+<!-- 4&#45;&gt;10 -->
+<g id="edge10" class="edge"><title>4&#45;&gt;10</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M211.326,-143.697C208.285,-135.813 204.617,-126.304 201.239,-117.546"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="204.462,-116.175 197.597,-108.104 197.931,-118.694 204.462,-116.175"/>
+</g>
+<!-- 5 -->
+<g id="node6" class="node"><title>5</title>
+<path fill="none" stroke="#56d89a" stroke-width="2" d="M157.5,-324C157.5,-324 92.5,-324 92.5,-324 86.5,-324 80.5,-318 80.5,-312 80.5,-312 80.5,-300 80.5,-300 80.5,-294 86.5,-288 92.5,-288 92.5,-288 157.5,-288 157.5,-288 163.5,-288 169.5,-294 169.5,-300 169.5,-300 169.5,-312 169.5,-312 169.5,-318 163.5,-324 157.5,-324"/>
+<text text-anchor="middle" x="125" y="-303.5" font-family="sans" font-size="10.00">index_genome</text>
+</g>
+<!-- 5&#45;&gt;0 -->
+<g id="edge1" class="edge"><title>5&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M125,-287.697C125,-279.983 125,-270.712 125,-262.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="128.5,-262.104 125,-252.104 121.5,-262.104 128.5,-262.104"/>
+</g>
+<!-- 6 -->
+<g id="node7" class="node"><title>6</title>
+<path fill="none" stroke="#ced856" stroke-width="2" d="M305,-252C305,-252 275,-252 275,-252 269,-252 263,-246 263,-240 263,-240 263,-228 263,-228 263,-222 269,-216 275,-216 275,-216 305,-216 305,-216 311,-216 317,-222 317,-228 317,-228 317,-240 317,-240 317,-246 311,-252 305,-252"/>
+<text text-anchor="middle" x="290" y="-231.5" font-family="sans" font-size="10.00">fastqc</text>
+</g>
+<!-- 9 -->
+<g id="node10" class="node"><title>9</title>
+<path fill="none" stroke="#56b1d8" stroke-width="2" d="M312,-108C312,-108 282,-108 282,-108 276,-108 270,-102 270,-96 270,-96 270,-84 270,-84 270,-78 276,-72 282,-72 282,-72 312,-72 312,-72 318,-72 324,-78 324,-84 324,-84 324,-96 324,-96 324,-102 318,-108 312,-108"/>
+<text text-anchor="middle" x="297" y="-87.5" font-family="sans" font-size="10.00">multiqc</text>
+</g>
+<!-- 6&#45;&gt;9 -->
+<g id="edge9" class="edge"><title>6&#45;&gt;9</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M290.844,-215.871C292.037,-191.67 294.229,-147.211 295.65,-118.393"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="299.156,-118.349 296.153,-108.189 292.164,-118.005 299.156,-118.349"/>
+</g>
+<!-- 7 -->
+<g id="node8" class="node"><title>7</title>
+<path fill="none" stroke="#5682d8" stroke-width="2" d="M166.5,-396C166.5,-396 83.5,-396 83.5,-396 77.5,-396 71.5,-390 71.5,-384 71.5,-384 71.5,-372 71.5,-372 71.5,-366 77.5,-360 83.5,-360 83.5,-360 166.5,-360 166.5,-360 172.5,-360 178.5,-366 178.5,-372 178.5,-372 178.5,-384 178.5,-384 178.5,-390 172.5,-396 166.5,-396"/>
+<text text-anchor="middle" x="125" y="-375.5" font-family="sans" font-size="10.00">get_genome_fasta</text>
+</g>
+<!-- 7&#45;&gt;5 -->
+<g id="edge7" class="edge"><title>7&#45;&gt;5</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M125,-359.697C125,-351.983 125,-342.712 125,-334.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="128.5,-334.104 125,-324.104 121.5,-334.104 128.5,-334.104"/>
+</g>
+<!-- 8 -->
+<g id="node9" class="node"><title>8</title>
+<path fill="none" stroke="#d85656" stroke-width="2" d="M328.5,-324C328.5,-324 225.5,-324 225.5,-324 219.5,-324 213.5,-318 213.5,-312 213.5,-312 213.5,-300 213.5,-300 213.5,-294 219.5,-288 225.5,-288 225.5,-288 328.5,-288 328.5,-288 334.5,-288 340.5,-294 340.5,-300 340.5,-300 340.5,-312 340.5,-312 340.5,-318 334.5,-324 328.5,-324"/>
+<text text-anchor="middle" x="277" y="-303.5" font-family="sans" font-size="10.00">get_SRA_by_accession</text>
+</g>
+<!-- 8&#45;&gt;0 -->
+<g id="edge2" class="edge"><title>8&#45;&gt;0</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M239.817,-287.876C219.13,-278.349 193.198,-266.407 171.215,-256.283"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="172.61,-253.072 162.063,-252.068 169.682,-259.43 172.61,-253.072"/>
+</g>
+<!-- 8&#45;&gt;6 -->
+<g id="edge8" class="edge"><title>8&#45;&gt;6</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M280.213,-287.697C281.646,-279.983 283.368,-270.712 284.965,-262.112"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="288.439,-262.575 286.823,-252.104 281.556,-261.297 288.439,-262.575"/>
+</g>
+<!-- 9&#45;&gt;1 -->
+<g id="edge4" class="edge"><title>9&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M271.069,-71.8761C257.338,-62.8083 240.294,-51.5523 225.464,-41.7592"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="227.304,-38.7798 217.03,-36.1898 223.446,-44.621 227.304,-38.7798"/>
+</g>
+<!-- 10&#45;&gt;1 -->
+<g id="edge5" class="edge"><title>10&#45;&gt;1</title>
+<path fill="none" stroke="grey" stroke-width="2" d="M191,-71.6966C191,-63.9827 191,-54.7125 191,-46.1124"/>
+<polygon fill="grey" stroke="grey" stroke-width="2" points="194.5,-46.1043 191,-36.1043 187.5,-46.1044 194.5,-46.1043"/>
+</g>
+</g>
+</svg>
diff --git a/pages/jupyter.html b/pages/jupyter.html
new file mode 100644
index 00000000..b6e5d200
--- /dev/null
+++ b/pages/jupyter.html
@@ -0,0 +1,1438 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Working with Jupyter</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Working with Jupyter</h1>
+            <p class="subtitle lead">How to generate reproducible reports and computational notebooks</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a></li>
+  <li><a href="#writing-markdown" id="toc-writing-markdown" class="nav-link" data-scroll-target="#writing-markdown"><span class="header-section-number">3</span> Writing markdown</a></li>
+  <li><a href="#writing-code" id="toc-writing-code" class="nav-link" data-scroll-target="#writing-code"><span class="header-section-number">4</span> Writing code</a></li>
+  <li><a href="#magics" id="toc-magics" class="nav-link" data-scroll-target="#magics"><span class="header-section-number">5</span> Magics</a></li>
+  <li><a href="#plotting" id="toc-plotting" class="nav-link" data-scroll-target="#plotting"><span class="header-section-number">6</span> Plotting</a></li>
+  <li><a href="#other-packages-for-plotting" id="toc-other-packages-for-plotting" class="nav-link" data-scroll-target="#other-packages-for-plotting"><span class="header-section-number">7</span> Other packages for plotting</a></li>
+  <li><a href="#widgets" id="toc-widgets" class="nav-link" data-scroll-target="#widgets"><span class="header-section-number">8</span> Widgets</a></li>
+  <li><a href="#other-interactive-plots" id="toc-other-interactive-plots" class="nav-link" data-scroll-target="#other-interactive-plots"><span class="header-section-number">9</span> Other interactive plots</a></li>
+  <li><a href="#extensions" id="toc-extensions" class="nav-link" data-scroll-target="#extensions"><span class="header-section-number">10</span> Extensions</a></li>
+  <li><a href="#reproducibility" id="toc-reproducibility" class="nav-link" data-scroll-target="#reproducibility"><span class="header-section-number">11</span> Reproducibility</a></li>
+  <li><a href="#version-control-of-jupyter-notebooks" id="toc-version-control-of-jupyter-notebooks" class="nav-link" data-scroll-target="#version-control-of-jupyter-notebooks"><span class="header-section-number">12</span> Version control of Jupyter notebooks</a>
+  <ul>
+  <li><a href="#other-tools-for-version-control-of-notebooks" id="toc-other-tools-for-version-control-of-notebooks" class="nav-link" data-scroll-target="#other-tools-for-version-control-of-notebooks"><span class="header-section-number">12.1</span> Other tools for version control of notebooks</a></li>
+  </ul></li>
+  <li><a href="#making-sure-notebooks-work-as-expected" id="toc-making-sure-notebooks-work-as-expected" class="nav-link" data-scroll-target="#making-sure-notebooks-work-as-expected"><span class="header-section-number">13</span> Making sure notebooks work as expected</a></li>
+  <li><a href="#converting-notebooks" id="toc-converting-notebooks" class="nav-link" data-scroll-target="#converting-notebooks"><span class="header-section-number">14</span> Converting notebooks</a></li>
+  <li><a href="#notebooks-and-quarto" id="toc-notebooks-and-quarto" class="nav-link" data-scroll-target="#notebooks-and-quarto"><span class="header-section-number">15</span> Notebooks and Quarto</a></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">16</span> Extra material</a></li>
+  <li><a href="#running-jupyter-notebooks-on-a-cluster" id="toc-running-jupyter-notebooks-on-a-cluster" class="nav-link" data-scroll-target="#running-jupyter-notebooks-on-a-cluster"><span class="header-section-number">17</span> Running Jupyter notebooks on a cluster</a></li>
+  <li><a href="#using-binder-to-share-interactive-notebooks" id="toc-using-binder-to-share-interactive-notebooks" class="nav-link" data-scroll-target="#using-binder-to-share-interactive-notebooks"><span class="header-section-number">18</span> Using Binder to share interactive notebooks</a></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p>The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain code, equations, visualizations and text. The functionality is partly overlapping with Quarto (see the <a href="quarto-1-introduction">tutorial</a>), in that they both use markdown and code chunks to generate reports that integrate results of computations with the code that generated them. Jupyter Notebook comes from the Python community while Quarto was developed by Posit (who also created R Markdown and RStudio), but you could use most common programming languages in either alternative. In practice though, it’s quite common that R developers use Jupyter but probably not very common that Python developers use RStudio. Some reasons to use Jupyter include:</p>
+<ul>
+<li>Python is lacking a really good IDE for doing exploratory scientific data analysis, like RStudio or Matlab. Some people use Jupyter simply as an alternative for that.</li>
+<li>The Jupyter Project community is large and dynamic, and there are lots of tools for sharing, displaying or interacting with notebooks.</li>
+<li>An early ambition with Jupyter notebooks (and its predecessor IPython notebooks) was to be analogous to the lab notebook used in a wet lab. It would allow the data scientist to document his or her day-to-day work and interweave results, ideas, and hypotheses with the code. From a reproducibility perspective, this is one of the main advantages.</li>
+<li>Jupyter notebooks can be used, just like Quarto, to provide a tighter connection between your data and your results by integrating results of computations with the code that generated them. They can also do this in an interactive way that makes them very appealing for sharing with others.</li>
+</ul>
+<p>As always, the best way is to try it out yourself and decide what to use it for!</p>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to set it up if you haven’t done so already. Then open up a terminal and go to <code>workshop-reproducible-research/tutorials/jupyter</code> and activate your <code>jupyter-env</code> Conda environment.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="A note on nomenclature">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note on nomenclature
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<ul>
+<li>Jupyter: a project to develop open-source software, open-standards, and services for interactive computing across dozens of programming languages. Lives at <a href="https://jupyter.org">jupyter.org</a>.</li>
+<li>Jupyter Notebook: A web application that you use for creating and managing notebooks. One of the outputs of the Jupyter project.</li>
+<li>Jupyter lab: A more powerful and feature-rich interface that also includes a terminal, debugger, tabs <em>etc.</em></li>
+<li>Jupyter notebook: The actual <code>.ipynb</code> file that constitutes your notebook.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>One thing that sets Jupyter Notebook apart from what you might be used to is that it’s a web application, <em>i.e.</em> you edit and run your code from your browser. But first you have to start the Jupyter Notebook server. At this point you may either try the classic notebook interface by running:</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> notebook <span class="at">--allow-root</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Or give the more feature-rich Jupyter lab interface a try by running:</p>
+<div class="sourceCode" id="cb2"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> lab <span class="at">--allow-root</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Whichever interface you choose you should see something similar to this printed to your terminal:</p>
+<pre class="no-highlight"><code>[I 18:02:26.722 NotebookApp] Serving notebooks from local directory: /Users/john/workshop-reproducible-research/tutorials/jupyter
+[I 18:02:26.723 NotebookApp] 0 active kernels
+[I 18:02:26.723 NotebookApp] The Jupyter Notebook is running at:
+[I 18:02:26.723 NotebookApp] http://localhost:8888/?token=e03f10ccb40efc3c6154358593c410a139b76acf2cae000
+[I 18:02:26.723 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).
+[C 18:02:26.724 NotebookApp]
+
+    Copy/paste this URL into your browser when you connect for the first time,
+    to login with a token:
+        http://localhost:8888/?token=e03f10ccb40efc3c6154358593c410a139b76acf2cae785c
+[I 18:02:27.209 NotebookApp] Accepting one-time-token-authenticated connection from ::1</code></pre>
+<div class="callout callout-style-default callout-tip callout-titled" title="A note for Windows users">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note for Windows users
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you see the error message <code>Start : This command cannot be run due to the error: The system cannot find the file specified. ...</code> then try starting Jupyter with <code>jupyter notebook --no-browser</code> then copy the URL given into the browser directly.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Jupyter notebook versions">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Jupyter notebook versions
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Depending on what version of the <code>notebook</code> conda package you have installed, the interface may look slightly different. The screenshots in this tutorial are from version 7, an update which has brought the ‘classic’ Jupyter notebook closer to the Jupyter lab interface. Read more about this update at the <a href="https://jupyter-notebook.readthedocs.io/en/latest/migrate_to_notebook7.html">Jupyter homepage</a>.</p>
+</div>
+</div>
+<p>The Jupyter Notebook/Lab interface probably opened up a web browser for you automatically, otherwise go to the address specified in the message in the terminal. Note that the server is running locally (as <code>http://localhost:8888</code>) so this does not require that you have an active internet connection. Also note that it says:</p>
+<pre class="no-highlight"><code>Serving notebooks from local directory: &lt;/some/local/path/workshop-reproducible-research/tutorials/jupyter&gt;</code></pre>
+<p>Everything you do in your Notebook session will be stored in this directory, so you won’t lose any work if you shut down the server.</p>
+<blockquote class="blockquote">
+<p><img src="images/jupyter_dashboard.png" class="img-fluid" width="700"></p>
+</blockquote>
+<p>What you’re looking at is the Notebook dashboard. This is where you manage your files, notebooks, and kernels. The Files tab shows the files in your directory. The Running tab keeps track of all your processes.</p>
+<p>The Jupyter lab dashboard should look something like this:</p>
+<blockquote class="blockquote">
+<p><img src="images/jupyterlab_dashboard.png" class="img-fluid" width="700"></p>
+</blockquote>
+<p>Let’s start by creating an empty notebook. You can do this by selecting the Files tab and clicking New &gt; Notebook. When the notebook opens, select the suggested Python 3 kernel from the drop-down menu.</p>
+<p>This will open up a new tab or window looking like this:</p>
+<blockquote class="blockquote">
+<p><img src="images/jupyter_empty_nb.png" class="img-fluid" width="700"></p>
+</blockquote>
+<p>Start by giving your notebook a name by clicking on the text “Untitled” at the top of the page. Enter “jupyter-tutorial.ipynb”.</p>
+<p>Note that for most of this tutorial we will describe how you work in the actual notebook and not devote a lot of time to the extra features available in the Jupyter lab interface.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you want to start Jupyter Notebooks on a cluster that you SSH to (<em>e.g.</em> Uppmax) see the section in the <a href="jupyter-10-extra-material">Extra material</a></p>
+</div>
+</div>
+<p>Jupyter notebooks are made up of cells, and you are currently standing in the first cell in your notebook. Your cursor should be blinking in this cell, indicating that you are in “Edit mode” meaning that you can type text in the cell. Pressing the <code>Esc</code> key puts you in “Command mode” which allows you to manipulate the notebook as a whole, more on this later.</p>
+<p>Cells in Jupyter notebooks can be of two types:<em>markdown</em> or <em>code</em>.</p>
+<ul>
+<li><strong>Markdown:</strong></li>
+</ul>
+<p>These cells contain static material such as captions, text, lists, images and so on. You express this using Markdown, which is a lightweight markup language. Markdown documents can then be converted to other formats for viewing (the document you’re reading now is written in Markdown and then converted to HTML). The format is discussed a little more in detail in the <a href="quarto-1-introduction">Quarto tutoriall</a>. Jupyter Notebook uses a dialect of Markdown called GitHub Flavoured Markdown, which is described <a href="https://guides.github.com/features/mastering-markdown/">here</a>.</p>
+<ul>
+<li><strong>Code:</strong></li>
+</ul>
+<p>These are the cells that actually do something, just as code chunks do in Quarto/R Markdown. You can write code in dozens of languages and do all kinds of clever tricks. You then run the code cell and any output the code generates, such as text or figures, will be displayed beneath the cell. We will get back to this in much more detail, but for now it’s enough to understand that code cells are for executing code that is interpreted by a kernel (in this case the Python version in your Conda environment).</p>
+<p>Before we continue, here are some shortcuts that can be useful. Note that they are only applicable when in “Command mode”. Most of them are also available from the menus. You can also view this list of shortcuts from the <strong>Help</strong> menu under “Show Keyboard Shortcuts”.</p>
+<table class="table">
+<thead>
+<tr class="header">
+<th>Shortcut</th>
+<th>Effect</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td><code>enter</code></td>
+<td>Enter Edit mode</td>
+</tr>
+<tr class="even">
+<td><code>escape</code></td>
+<td>Enter Command mode</td>
+</tr>
+<tr class="odd">
+<td><code>ctrl-enter</code></td>
+<td>Run the cell</td>
+</tr>
+<tr class="even">
+<td><code>shift-enter</code></td>
+<td>Run the cell and select the cell below</td>
+</tr>
+<tr class="odd">
+<td><code>alt-enter</code></td>
+<td>Run the cell and insert a new cell below</td>
+</tr>
+<tr class="even">
+<td><code>s</code></td>
+<td>Save the notebook</td>
+</tr>
+<tr class="odd">
+<td><code>tab</code></td>
+<td>For code completion or indentation</td>
+</tr>
+<tr class="even">
+<td><code>m,y</code></td>
+<td>Toggle between Markdown and Code cells</td>
+</tr>
+<tr class="odd">
+<td><code>d-d</code></td>
+<td>Delete a cell</td>
+</tr>
+<tr class="even">
+<td><code>a</code></td>
+<td>Insert cells above current cell</td>
+</tr>
+<tr class="odd">
+<td><code>b</code></td>
+<td>Insert cells below current cell</td>
+</tr>
+<tr class="even">
+<td><code>x</code></td>
+<td>Cut currently selected cells</td>
+</tr>
+<tr class="odd">
+<td><code>v</code></td>
+<td>Paste cell below</td>
+</tr>
+<tr class="even">
+<td><code>o</code></td>
+<td>Toggle output of current cell</td>
+</tr>
+</tbody>
+</table>
+</section>
+<section id="writing-markdown" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="writing-markdown"><span class="header-section-number">3</span> Writing markdown</h2>
+<p>Let’s use our first cell to create a header. Change the format from Code to Markdown using the drop-down list in the Notebook Toolbar, or by pressing the <code>m</code> key when in command mode. Double click on the cell, or hit <code>enter</code> to enter editing mode and input “# My notebook” (“#” is used in Markdown for header 1). Run the cell with <code>ctrl</code>-<code>enter</code> (<code>cmd</code>-<code>enter</code> on Mac).</p>
+<p>Markdown is a simple way to structure your notebook into sections with descriptive notes, lists, links, images etc.</p>
+<p>Below are some examples of what you can do in markdown. Paste all or parts of it into one or more cells in your notebook to see how it renders. Make sure you set the cell type to Markdown.</p>
+<pre><code>## Introduction
+In this notebook I will try out some of the **fantastic** concepts of Jupyter
+Notebooks.
+
+## Markdown basics
+Examples of text attributes are:
+
+- *italics*
+- **bold**
+- `monospace`
+
+Sections can be separated by horizontal lines.
+
+---
+
+Blockquotes can be added, for instance to insert a Monty Python quote:
+
+&gt; Spam!
+&gt; Spam!
+&gt; Spam!
+&gt; Spam!
+
+See [here](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Working%20With%20Markdown%20Cells.html) for more information.</code></pre>
+</section>
+<section id="writing-code" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="writing-code"><span class="header-section-number">4</span> Writing code</h2>
+<p>Now let’s write some code! Since we chose a Python kernel, Python would be the native language to run in a cell. Enter this code in the second cell and run it:</p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"Hello world!"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Note how the output is directly displayed below the cell. This interactive way of working is one of the things that sets Jupyter Notebook apart from RStudio and Quarto. In RStudio/Quarto, documents are typically rendered top-to-bottom in one run, while you work <em>in</em> a Jupyter notebook in a different way. This requires some special attention when it comes to reproducibility, which we will get back to in the <a href="jupyter-7-reproducibility">reproducibility</a> section.</p>
+<p>What <strong>is</strong> a Jupyter notebook? Let’s look a closer at the notebook we’re currently working in. Jupyter Notebooks are autosaved every minute or so, so you will already have it available. We can be a little meta and do this from within the notebook itself, by running some shell commands in a code cell. This very handy functionality is possible by prepending the command with <code>!</code>. Try adding <code>!ls</code> to a code cell and run it. This will list the files in the current directory.</p>
+<p>Aha, we have a new file called <code>jupyter-tutorial.ipynb</code>! This is our notebook. Look at the first ten lines of the file by using <code>!head jupyter-tutorial.ipynb</code>. Seems like it’s just a plain old JSON file. Since it’s a text file it’s suitable for version control with for example Git. There are however some special considerations to take into account for Notebooks which we will cover in the <a href="jupyter-7-reproducibility">reproducibility section</a> of this tutorial.</p>
+<p>Variables defined in cells become variables in the global namespace. You can therefore share information between cells. Try to define a function or variable in one cell and use it in the next. For example, add the following to a new cell and run it:</p>
+<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> print_me(<span class="bu">str</span>):</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="bu">str</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now create a new cell and add:</p>
+<div class="sourceCode" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>print_me(<span class="st">"Hi!"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Your notebook should now look something like this.</p>
+<blockquote class="blockquote">
+<p><img src="images/jupyter_basic_update.png" class="img-fluid" width="700"></p>
+</blockquote>
+<p>The focus of this tutorial is not on how to write Markdown or Python; you can make really pretty notebooks with Markdown and you can code whatever you want with Python. Rather, we will focus on the Jupyter Notebook features that allow you to do a little more than that.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>That a Jupyter notebook consists of a series of cells, and that they can be either markdown or code cells.</li>
+<li>That we execute the code in a code cell with the kernel that we chose when opening the notebook.</li>
+<li>We can run shell commands by prepending them with <code>!</code>.</li>
+<li>A Jupyter notebook is simply a text file in JSON format.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="magics" class="level2" data-number="5">
+<h2 data-number="5" class="anchored" data-anchor-id="magics"><span class="header-section-number">5</span> Magics</h2>
+<p>Magics constitute a simple command language that significantly extends the power of Jupyter notebooks. There are two types of magics:</p>
+<ul>
+<li><strong>Line magics</strong>: Commands that are prepended by <code>%</code>, and whose arguments only extend to the end of the line.</li>
+<li><strong>Cell magics</strong>: Commands that start with <code>%%</code> and then apply to the whole cell. Must be written on the first line of a cell.</li>
+</ul>
+<p>Now list all available magics with <code>%lsmagic</code> (which itself is a magic). You add a question mark to a magic to show the help (<em>e.g.</em> <code>%lsmagic?</code>). Some of them act as shortcuts for commonly used shell commands (<code>%ls</code>, <code>%cp</code>, <code>%cat</code>, ..). Others are useful for debugging and optimizing your code (<code>%timeit</code>, <code>%debug</code>, <code>%prun</code>, ..). For more information see the <a href="https://ipython.readthedocs.io/en/stable/interactive/magics.html">magics documentation</a>.</p>
+<p>A very useful magic, in particular when using shell commands a lot in your work, is <code>%%capture</code>. This will capture the stdout/stderr of any code cell and store them in a Python object. Run <code>%%capture?</code> to display the help and try to understand how it works. Try it out with either some Python code, other magics or shell commands. Here is an example of how you can make it work:</p>
+<pre class="no-highlight"><code>%%capture output
+%%bash
+echo "Print to stdout"
+echo "Print to stderr" &gt;&amp;2</code></pre>
+<p>… And in another cell:</p>
+<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"stdout:"</span> <span class="op">+</span> output.stdout)</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"stderr:"</span> <span class="op">+</span> output.stderr)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<blockquote class="blockquote">
+<p><strong>Tip</strong> <br> You can capture the output of some magics directly like this: <code>my_dir = %pwd</code>.</p>
+</blockquote>
+<p>The <code>%%script</code> magic is used for specifying a program (Bash, Perl, Ruby, ..) with which to run the code (similar to a shebang). For some languages it’s possible to use these shortcuts:</p>
+<ul>
+<li><code>%%ruby</code></li>
+<li><code>%%perl</code></li>
+<li><code>%%bash</code></li>
+<li><code>%%html</code></li>
+<li><code>%%latex</code></li>
+<li><code>%%R</code></li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="A note on R code">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note on R code
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In order to use the <code>%%R</code> magic you need to install the <code>rpy2</code> extension, for example with Conda. This package is already installed in the <code>jupyter-env</code> environment you’re using for this tutorial. However, you also have to load it by running <code>%load_ext rpy2.ipython</code> in a cell.</p>
+</div>
+</div>
+<p>Try this out if you know any of the languages above. Otherwise you can always try to print the quadratic formula with LaTeX!</p>
+<pre class="no-highlight"><code>\begin{array}{*{20}c} {x = \frac{{ - b \pm \sqrt {b^2 - 4ac} }}{{2a}}} &amp; {{\rm{when}}} &amp; {ax^2 + bx + c = 0} \\ \end{array}</code></pre>
+<p>Another useful magic is <code>%precision</code> which sets the floating point precision in the notebook. As a quick example, add the following to a cell and run it:</p>
+<div class="sourceCode" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="bu">float</span>(<span class="dv">100</span><span class="op">/</span><span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Next set the precision to 4 decimal points by running a cell with:</p>
+<pre><code>%precision 4</code></pre>
+<p>Now run the cell with <code>float(100/3)</code> again to see the difference.</p>
+<p>Running <code>%precision</code> without additional arguments will restore the default.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>The basics of Jupyter magics and the difference between line magics and cell magics</li>
+<li>How to capture and use output from notebook cells with <code>%%capture</code></li>
+<li>How to use magics to run non-Python code in notebooks</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="plotting" class="level2" data-number="6">
+<h2 data-number="6" class="anchored" data-anchor-id="plotting"><span class="header-section-number">6</span> Plotting</h2>
+<p>An essential feature of Jupyter Notebooks is of course the ability to visualize data and results via plots. A full guide to plotting in Python is beyond the scope of this course, but we’ll offer a few glimpses into the plotting landscape of Python.</p>
+<p>First of all, Python has a library for plotting called <a href="https://matplotlib.org/stable/index.html">matplotlib</a>, which comes packed with functionality for creating high-quality plots. Below is an example of how to generate a line plot of a sine wave.</p>
+<div class="sourceCode" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Import packages</span></span>
+<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Generate a set of evenly spaced numbers between 0 and 100</span></span>
+<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> np.linspace(<span class="dv">0</span>,<span class="dv">3</span><span class="op">*</span>np.pi,<span class="dv">100</span>)</span>
+<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a><span class="co"># Use the sine function to generate y-values</span></span>
+<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> np.sin(x)</span>
+<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Plot the data</span></span>
+<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a>line, <span class="op">=</span> plt.plot(x, y, color<span class="op">=</span><span class="st">'red'</span>, linestyle<span class="op">=</span><span class="st">"-"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>By default plots are rendered in the notebook as rasterised images which can make the quality poor. To render in scalable vector graphics format use the <code>set_matplotlib_formats</code> from the matplotlib_inline package:</p>
+<div class="sourceCode" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib_inline</span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>matplotlib_inline.backend_inline.set_matplotlib_formats(<span class="st">'pdf'</span>, <span class="st">'svg'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now try running the code for the sine wave plot again.</p>
+</section>
+<section id="other-packages-for-plotting" class="level2" data-number="7">
+<h2 data-number="7" class="anchored" data-anchor-id="other-packages-for-plotting"><span class="header-section-number">7</span> Other packages for plotting</h2>
+<p>As we mentioned Matplotlib comes with <strong>a lot</strong> of functionality which is great because it allows you to create all sorts of plots and modify them exactly to your liking. However, this can also mean that creating very basic plots might involve a lot of cumbersome coding, when all you want is a simple bar chart!</p>
+<p>Fortunately there are a number of Python packages that build upon matplotlib but with a much simplified interface. One such popular package is <a href="http://Seaborn.pydata.org/">Seaborn</a>. Below we’ll see how to generate a nice looking bar plot with error bars.</p>
+<p>First import the Seaborn package (using an abbreviated name to simplify typing):</p>
+<div class="sourceCode" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Next we’ll load some example data of penguins collected at the Palmer Station, in Antarctica.</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="co"># Look at first 5 lines of the data</span></span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>penguins.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The most basic way to generate a bar plot of this data with Seaborn is:</p>
+<div class="sourceCode" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.barplot(data<span class="op">=</span>penguins)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Simple right? Yes, but maybe not very informative. Here Seaborn simply calculates the mean of all numeric variables for the penguins and plots them with error bars representing a 95% confidence interval.</p>
+<p>Let’s say that instead we want to plot the mean value of the body mass of the penguins at the different islands where they were examined.</p>
+<pre><code>sns.barplot(data=penguins, x="island", y="body_mass_g", errorbar="sd");</code></pre>
+<p>Here we specified to use values in the ‘island’ column as categories for the x-axis, and values in the ‘body_mass_g’ column as values for the y-axis. The barplot function of Seaborn will then calculate the mean body mass for each island and plot the bars. With <code>errorbar="sd"</code> we tell the function to draw the standard deviation as error bars, instead of computing a confidence interval.</p>
+<p>If we instead want to visualize the data as a scatterplot we can use the <code>sns.scatterplot</code> function. Let’s plot the body mass vs bill length for all penguins and colour the data points by species. We’ll also move the legend outside of the plotting area and modify the x and y-axis labels:</p>
+<div class="sourceCode" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Store the matplotlib axes containing the plot in a variable called 'ax'</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>ax <span class="op">=</span> sns.scatterplot(data<span class="op">=</span>penguins, x<span class="op">=</span><span class="st">"bill_length_mm"</span>, y<span class="op">=</span><span class="st">"body_mass_g"</span>,</span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>                     hue<span class="op">=</span><span class="st">"species"</span>)</span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Modify the labels of the plot</span></span>
+<span id="cb20-5"><a href="#cb20-5" aria-hidden="true" tabindex="-1"></a>ax.set_xlabel(<span class="st">"Bill length (mm)"</span>)</span>
+<span id="cb20-6"><a href="#cb20-6" aria-hidden="true" tabindex="-1"></a>ax.set_ylabel(<span class="st">"Body mass (g)"</span>)</span>
+<span id="cb20-7"><a href="#cb20-7" aria-hidden="true" tabindex="-1"></a><span class="co"># Set legend position outside of plot</span></span>
+<span id="cb20-8"><a href="#cb20-8" aria-hidden="true" tabindex="-1"></a>ax.legend(bbox_to_anchor<span class="op">=</span>(<span class="dv">1</span>,<span class="dv">1</span>))<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you want to save a plot to file you can use the <code>plt.savefig</code> function. Add the following to the bottom of the cell with the scatterplot code:</p>
+<div class="sourceCode" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>plt.savefig(<span class="st">"scatterplot.pdf"</span>, bbox_inches<span class="op">=</span><span class="st">"tight"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The <code>bbox_inches="tight"</code> setting ensures that the figure is not clipped when saved to file.</p>
+<p>The Seaborn <a href="http://Seaborn.pydata.org/">website</a> contains great tutorials and examples of other ways to plot data!</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to generate simple plots with <code>matplotlib</code></li>
+<li>How to import and use the <code>Seaborn</code> package for plotting</li>
+<li>How to save plots from notebooks to a file</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="widgets" class="level2" data-number="8">
+<h2 data-number="8" class="anchored" data-anchor-id="widgets"><span class="header-section-number">8</span> Widgets</h2>
+<p>Since we’re typically running our notebooks in a web browser, they are quite well suited for also including more interactive elements. A typical use case could be that you want to communicate some results to a collaborator or to a wider audience, and that you would like them to be able to modify how the results are displayed. It could, for example, be to select which gene to plot for, or to see how some parameter value affects a clustering. Jupyter notebooks has great support for this in the form of <em>widgets</em>.</p>
+<p>Widgets are eventful Python objects that have a representation in the browser, often as a control like a slider, text box, etc. These are implemented in the <code>ipywidgets</code> package.</p>
+<p>The easiest way to get started with using widgets are via the <code>interact</code> and <code>interactive</code> functions. These functions auto-generate widgets from functions that you define, and then call those functions when you manipulate the widgets. This might sound abstract so let’s look at an example.</p>
+<p>Let’s take the scatterplot of the penguins dataset that we generated in the previous section and add widgets that lets us choose variables to plot as well as coloring of the points.</p>
+<p>First we’ll import the <code>interactive</code> function from <code>ipywidgets</code>. Let’s also import the <code>widgets</code> module which we we’ll use later. Add the following code to a cell and run it:</p>
+<div class="sourceCode" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ipywidgets <span class="im">import</span> interactive, widgets</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now, in a new cell, define a function called <code>scatterplot</code> with the code to generate the plot itself. Also add a <code>palette</code> argument to the function so that we can specify the colour palette to use for the plot. The function should look like this:</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> scatterplot(x, y, hue, palette):</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    ax <span class="op">=</span> sns.scatterplot(data<span class="op">=</span>penguins, x<span class="op">=</span>x, y<span class="op">=</span>y, hue<span class="op">=</span>hue, palette<span class="op">=</span>palette)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Run the cell and create a new cell below it.</p>
+<p>Next, we’ll use the <code>interactive</code> function to generate a widget to control the <code>x</code>, <code>y</code>, <code>hue</code> and <code>palette</code> arguments. The <code>interactive</code> function takes a function as its first argument, and then keyword arguments for each of the arguments in the function. The returned value is a widget which we will store in a variable called <code>interactive_scatterplot</code>. Add the following to a cell and run it:</p>
+<div class="sourceCode" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>interactive_scatterplot <span class="op">=</span> interactive(scatterplot,</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span>[<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>,<span class="st">"body_mass_g"</span>],</span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a>            y<span class="op">=</span>[<span class="st">"body_mass_g"</span>,<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>],</span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a>            hue<span class="op">=</span>[<span class="st">"species"</span>,<span class="st">"island"</span>,<span class="st">"sex"</span>],</span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>            palette<span class="op">=</span>[<span class="st">"Set1"</span>,<span class="st">"Set2"</span>,<span class="st">"Dark2"</span>,<span class="st">"Paired2"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Importantly, all parameters defined in the <code>scatterplot</code> function must be given in the <code>interactive</code> call. The <code>interactive_scatterplot</code> widget is now tied to the <code>scatterplot</code> function. However, we still haven’t displayed the widget itself. To do that, simply add <code>interactive_scatterplot</code> to a new cell and run it:</p>
+<div class="sourceCode" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>interactive_scatterplot</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This should show the scatterplot with drop-down menus for each of the arguments. Try changing the <code>x</code> and <code>y</code> variables to plot by selecting from the respective drop-downs. The <code>hue</code> drop-down now lets you change what variable to use for colouring the points and the <code>palette</code> drop-down changes the colour palette. As you can see, the available options in the drop-downs are the ones we specified in the <code>interactive</code> call.</p>
+<p>Depending on the <code>type</code> of the passed argument different types of widgets will be created by <code>interactive</code>. For instance:</p>
+<ul>
+<li><code>int</code> or <code>float</code> arguments will generate a slider</li>
+<li><code>bool</code> arguments (True/False) will generate checkbox widgets</li>
+<li><code>list</code> arguments will generate a drop-down</li>
+<li><code>str</code> arguments will generate a text-box</li>
+</ul>
+<p>Let’s add a slider to control the size of the points. In the Seaborn package this is controlled by the <code>s</code> argument to the <code>scatterplot</code> function. Modify the cell with your <code>scatterplot</code> function so it looks like this (remember to run the cell in order to update the function definition):</p>
+<div class="sourceCode" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> scatterplot(x, y, hue, palette, size<span class="op">=</span><span class="dv">50</span>):</span>
+<span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    ax <span class="op">=</span> sns.scatterplot(data<span class="op">=</span>penguins, x<span class="op">=</span>x, y<span class="op">=</span>y, hue<span class="op">=</span>hue, palette<span class="op">=</span>palette, s<span class="op">=</span>size)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Note that we added a <code>size</code> argument to the function and supplied it to the Seaborn scatterplot call with <code>s=size</code>. Setting <code>size=50</code> in the function definition means that the default size of the points will be 50.</p>
+<p>Now we need to add a slider for the <code>size</code> argument. Update the cell where we call the <code>interactive</code> function so that it looks like this, then run it:</p>
+<div class="sourceCode" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>interactive_scatterplot <span class="op">=</span> interactive(scatterplot,</span>
+<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span>[<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>,<span class="st">"body_mass_g"</span>],</span>
+<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>            y<span class="op">=</span>[<span class="st">"body_mass_g"</span>,<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>,],</span>
+<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>            hue<span class="op">=</span>[<span class="st">"species"</span>,<span class="st">"island"</span>,<span class="st">"sex"</span>],</span>
+<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>            palette<span class="op">=</span>[<span class="st">"Set1"</span>,<span class="st">"Set2"</span>,<span class="st">"Dark2"</span>,<span class="st">"Paired2"</span>],</span>
+<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>            size<span class="op">=</span>(<span class="dv">20</span>,<span class="dv">100</span>,<span class="dv">10</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here the <code>size</code> argument is defined as a <a href="https://docs.python.org/3/library/stdtypes.html#tuple">tuple</a> which sets the minimum value of the slider to 20, the maximum value to 100 and the step size to 10.</p>
+<p>Finally, re-run the cell where we displayed the <code>interactive_scatterplot</code> widget. You should now see a slider for the <code>size</code> argument (starting at 50). Try changing the size of the points by moving the slider.</p>
+<p>This is how it should look if everything works.</p>
+<p><img src="images/jupyter_widget.png" class="img-fluid" width="700"></p>
+<p>There are lots of widgets, <em>e.g.</em>:</p>
+<ul>
+<li>Drop-down menus</li>
+<li>Toggle buttons</li>
+<li>Range sliders</li>
+<li>File uploader</li>
+</ul>
+<p>… And much, much more. Here is a <a href="https://ipywidgets.readthedocs.io/en/latest/examples/Widget%20List.html">list of all available widgets</a> together with documentation and examples. Some of these widgets cannot be auto-generated by <code>interactive</code>, but fear not! Instead of relying on auto-generation we can define the widget and supply it directly to <code>interactive</code>.</p>
+<p>To see this in practice, we’ll modify the scatterplot function to display a title and add a color picker widget that let’s us set the color of the title text.</p>
+<p>First, update the scatterplot function so that it looks like this:</p>
+<div class="sourceCode" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> scatterplot(x, y, hue, palette, size, color):</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>    ax <span class="op">=</span> sns.scatterplot(data<span class="op">=</span>penguins, x<span class="op">=</span>x, y<span class="op">=</span>y, hue<span class="op">=</span>hue, palette<span class="op">=</span>palette, s<span class="op">=</span>size)</span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a>    ax.set_title(<span class="st">"Penguin scatterplot"</span>, color<span class="op">=</span>color)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Then run the cell to update the function definition.</p>
+<p>Next, we’ll define the colour picker widget. Add the definition to the cell where you defined the <code>interactive_scatterplot</code> then supply the widget to the <code>interactive</code> call. The cell should look like this:</p>
+<div class="sourceCode" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>colorpicker <span class="op">=</span> widgets.ColorPicker(</span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>    concise<span class="op">=</span><span class="va">False</span>,</span>
+<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>    description<span class="op">=</span><span class="st">'Title color'</span>,</span>
+<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a>    value<span class="op">=</span><span class="st">'blue'</span>,</span>
+<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a>    disabled<span class="op">=</span><span class="va">False</span></span>
+<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a>interactive_scatterplot <span class="op">=</span> interactive(scatterplot,</span>
+<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span>[<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>,<span class="st">"body_mass_g"</span>],</span>
+<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a>            y<span class="op">=</span>[<span class="st">"body_mass_g"</span>,<span class="st">"bill_length_mm"</span>,<span class="st">"bill_depth_mm"</span>,<span class="st">"flipper_length_mm"</span>],</span>
+<span id="cb29-10"><a href="#cb29-10" aria-hidden="true" tabindex="-1"></a>            hue<span class="op">=</span>[<span class="st">"species"</span>,<span class="st">"island"</span>,<span class="st">"sex"</span>],</span>
+<span id="cb29-11"><a href="#cb29-11" aria-hidden="true" tabindex="-1"></a>            palette<span class="op">=</span>[<span class="st">"Set1"</span>,<span class="st">"Set2"</span>,<span class="st">"Dark2"</span>,<span class="st">"Paired2"</span>],</span>
+<span id="cb29-12"><a href="#cb29-12" aria-hidden="true" tabindex="-1"></a>            size<span class="op">=</span>(<span class="dv">20</span>, <span class="dv">100</span>, <span class="dv">10</span>),</span>
+<span id="cb29-13"><a href="#cb29-13" aria-hidden="true" tabindex="-1"></a>            color<span class="op">=</span>colorpicker)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Run the cell to update the widgets.</p>
+<p>Finally, re-run the cell where we displayed the <code>interactive_scatterplot</code>. The plot should now have a title and you should see a new color picker below the slider for the point size. Try changing the title colour by clicking on the new color picker.</p>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Note that you may have to close the colour picker once you’ve made your choice in order to make the plot update.</p>
+</div>
+</div>
+</section>
+<section id="other-interactive-plots" class="level2" data-number="9">
+<h2 data-number="9" class="anchored" data-anchor-id="other-interactive-plots"><span class="header-section-number">9</span> Other interactive plots</h2>
+<p>Jupyter widgets, like we used here, is the most vanilla way of getting interactive graphs in Jupyter notebooks. Some other alternatives are:</p>
+<ul>
+<li><a href="https://altair-viz.github.io/">altair</a> is a plotting library that uses Vega-Lite grammar which is reminiscent of ggplot2 in R. The syntax is different from what we’ve shown here, but it’s very powerful once you get the hang of it.</li>
+<li><a href="https://plot.ly/python/ipython-notebook-tutorial">Plotly</a> is actually an API to a web service that renders your graph and returns it for display in your Jupyter notebook. Generates very visually appealing graphs, but from a reproducibility perspective it’s maybe not a good idea to be so reliant on a third party.</li>
+<li><a href="https://bokeh.pydata.org/en/latest/docs/user_guide/notebook.html#userguide-notebook">Bokeh</a> is another popular tool for interactive graphs. Most plotting packages for Python are built on top of matplotlib, but Bokeh has its own library. This can give a steeper learning curve if you’re used to the standard packages.</li>
+<li><a href="http://mpld3.github.io">mpld3</a> tries to integrate matplotlib with Javascript and the D3js package. It doesn’t scale well for very large datasets, but it’s easy to use and works quite seamlessly.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to implement interactive widgets in notebooks</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="extensions" class="level2" data-number="10">
+<h2 data-number="10" class="anchored" data-anchor-id="extensions"><span class="header-section-number">10</span> Extensions</h2>
+<p>Jupyter Notebook extensions are add-ons that can increase the functionality of your notebooks. Extensions include themes, editors, git support, renderers and much more. The most user-friendly way of managing extensions is via the Extension Manager available in the Jupyter lab interface. You can access it by clicking the puzzle piece icon in the left sidebar.</p>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Note that the extension manager contains a disclaimer warning you that these third-party extensions are not reviewed for vulnerabilities which means that you should be careful about what extensions you install.</p>
+</div>
+</div>
+<p>You can use the search field to perform a free text search for available extensions, then click ‘Install’ to install an extension. Not that in some cases you will be prompted to install additional packages.</p>
+<p>While an in-depth listing of available extensions is well beyond the scope of this tutorial we offer this list of a few extensions that are of particular relevance to this course:</p>
+<ul>
+<li><a href="https://github.com/jupyterlab/jupyterlab-github">Jupyterlab/GitHub</a> - view and open files from GitHub</li>
+<li><a href="https://github.com/jupyterlab/jupyterlab-git">Jupyterlab/Git</a> - version controlling with git</li>
+<li><a href="https://github.com/mamba-org/gator">mamba-org/gator-lab</a> - manage Conda environments</li>
+<li><a href="https://github.com/voila-dashboards/voila">voila-dashboards/Jupyterlab-preview</a> - preview a rendered version of your notebook</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>What Jupyter extensions are and how to manage them</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="reproducibility" class="level2" data-number="11">
+<h2 data-number="11" class="anchored" data-anchor-id="reproducibility"><span class="header-section-number">11</span> Reproducibility</h2>
+<p>Now that you have a feeling for what Jupyter can do we’ll spend a little time on things to consider specifically from a repdroducibility point of view when it comes to Jupyter notebooks.</p>
+</section>
+<section id="version-control-of-jupyter-notebooks" class="level2" data-number="12">
+<h2 data-number="12" class="anchored" data-anchor-id="version-control-of-jupyter-notebooks"><span class="header-section-number">12</span> Version control of Jupyter notebooks</h2>
+<p>As we’ve seen, Jupyter notebooks are plain-text JSON files. This means that they can be version controlled with Git just like any other text file. However, because of the way Jupyter notebooks store their content, the diffs produced by Git can be difficult to interpret. Luckily, there are tools that can provide content-aware diffs and merge functionality for Jupyter notebooks.</p>
+<p>One such tool is <a href="https://nbdime.readthedocs.io/en/latest/">nbdime</a>. <code>nbdime</code> is built to understand the structure of Jupyter notebooks and can therefore generate diffs that are easier to read. It can also be used to merge changes made to notebooks, which is great especially when collaborating on notebooks with others.</p>
+<p><code>nbdime</code> is already installed in the <code>jupyter-env</code> Conda environment you are using for this tutorial. To try it in action, create a new notebook and name it <code>Analysis.ipynb</code>. Add the following code to the first cell, then run it:</p>
+<div class="sourceCode" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This simply imports some python modules and loads a dataset.</p>
+<p>Save the notebook. Now we’ll add and commit the new notebook to the Git repository:</p>
+<div class="sourceCode" id="cb31"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> add Analysis.ipynb</span>
+<span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> commit <span class="at">-m</span> <span class="st">"Add Analysis notebook"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>So far so good. And nothing new here compared to what we’ve already learned about version control. Now let’s make some changes to the notebook. First we’ll replace one of the loaded modules. Update the first cell of the notebook so that it reads:</p>
+<div class="sourceCode" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>penguins <span class="op">=</span> sns.load_dataset(<span class="st">"penguins"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Then create a new cell where we’ll calculate the mean of each numeric value grouped by species. In the new cell, add the following code:</p>
+<div class="sourceCode" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>penguins.groupby(<span class="st">"species"</span>).mean(numeric_only<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Run the cell and save the notebook.</p>
+<p>Now use <code>git diff</code> to view the changes we’ve made to the notebook. Run:</p>
+<div class="sourceCode" id="cb34"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="fu">git</span> diff Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Even with very minor modifications to the notebook the diff will contain numerous lines that are difficult to interpret. This is because the notebook not only contains the code, but also cell metadata and output (in this case a table produced by the second cell).</p>
+<p>Now let’s generate a more easy-to-read diff. Run:</p>
+<div class="sourceCode" id="cb35"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a><span class="ex">nbdiff</span> <span class="at">-s</span> Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will use the <code>nbdiff</code> tool that comes with <code>nbdime</code> to show an inline diff of the notebook. The <code>-s</code> flag tells <code>nbdiff</code> to only show differences for the actual code changes, ignoring changes in metadata and output. There are a number of flags you can use here to customise the diff. The uppercase version of each flag will ignore the respective change type. For example, to see the diff but ignore changes to the output of cells you can run:</p>
+<div class="sourceCode" id="cb36"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="ex">nbdiff</span> <span class="at">-O</span> Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>nbdime also comes with a graphical web-based diff viewer. To try it, run:</p>
+<div class="sourceCode" id="cb37"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="ex">nbdiff-web</span> Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will open up a tab in your web browser showing you changes made to the notebook side-by-side for each cell, including also cell output. This makes it easy to see changes made both to code and outputs such as tables and plots.</p>
+<section id="other-tools-for-version-control-of-notebooks" class="level3" data-number="12.1">
+<h3 data-number="12.1" class="anchored" data-anchor-id="other-tools-for-version-control-of-notebooks"><span class="header-section-number">12.1</span> Other tools for version control of notebooks</h3>
+<ul>
+<li>You can also install the nbdime jupyter <a href="https://github.com/jupyter/nbdime">lab extension</a> to get access to the diff functionality directly from the Jupyter lab interface. If you also install the <a href="https://github.com/jupyterlab/jupyterlab-git">jupyterlab-git</a> extension you can both view diffs and commit changes directly from Jupyter lab.</li>
+<li><a href="https://code.visualstudio.com/">VS Code</a> actually comes with built-in support for both Jupyter notebooks and Git so that you can view <a href="https://code.visualstudio.com/docs/datascience/jupyter-notebooks#_custom-notebook-diffing">informative diffs</a> directly from the editor</li>
+</ul>
+</section>
+</section>
+<section id="making-sure-notebooks-work-as-expected" class="level2" data-number="13">
+<h2 data-number="13" class="anchored" data-anchor-id="making-sure-notebooks-work-as-expected"><span class="header-section-number">13</span> Making sure notebooks work as expected</h2>
+<p>One of the great things with Jupyter notebooks is the ability to do data exploration in an interactive way. Because loaded data, defined variables and functions remain in the notebook until you restart the kernel, you can easily make changes to your analysis and re-run cells to see the effect of the changes immediately. However, this can also be a source of errors and inconsistencies if you, during your work, modify or use variables in cells upstream of their initial definition.</p>
+<p>The <code>nbval</code> package can help you catch these types of errors. <code>nbval</code> is a plugin for the <code>pytest</code> testing framework that can be used to test Jupyter notebooks. It works by executing each cell in the notebook and comparing the output to the output stored in the notebook. If the output is the same, the test passes. If the output differs, the test fails. <code>nbval</code> is also pre-installed in the <code>jupyter-env</code> Conda environment you’re using for this tutorial.</p>
+<p>As an example, we’ll keep working with the <code>Analysis.ipynb</code> notebook we’ve created.</p>
+<p>Let’s say we want to estimate the size of the bill of penguins using the <code>bill_length_mm</code> and <code>bill_depth_mm</code> columns. We’ll do this by adding a new cell to our notebook with the following code:</p>
+<div class="sourceCode" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bill_size"</span>] <span class="op">=</span> (penguins[<span class="st">"bill_length_mm"</span>] <span class="op">*</span> penguins[<span class="st">"bill_depth_mm"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Run the cell and add a new one below it. In the new cell, output the mean of each column grouped by <code>island</code> using the following code:</p>
+<div class="sourceCode" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>penguins.groupby(<span class="st">"island"</span>).mean(numeric_only<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Run the cell to see the output. Looks good. Now we have a very simple example of some exploratory analyses on a dataset.</p>
+<p>Save the notebook and try running <code>nbval</code> on it to see if it works as expected. From the commandline, run:</p>
+<div class="sourceCode" id="cb40"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pytest</span> <span class="at">--nbval</span> Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>nbval tests each cell in your notebook by executing it and comparing the output to the output stored in the notebook. If the output is the same, the test passes. The output of the test should look something like this:</p>
+<pre><code>collected 4 items
+
+Analysis.ipynb ....                                                                                                   [100%]
+
+========== 4 passed in 1.93s ==========</code></pre>
+<p>Now let’s say we realize that we want to normalize the <code>bill_size</code> values by the body mass of the penguins. We’ll just modify the cell where we calculated this value, introducing a small piece of code to divide by the <code>body_mass_g</code> column.</p>
+<p>Change the third cell of the notebook so that it reads:</p>
+<div class="sourceCode" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bill_size"</span>] <span class="op">=</span> (penguins[<span class="st">"bill_length_mm"</span>] <span class="op">*</span> penguins[<span class="st">"bill_depth_mm"</span>]) <span class="op">/</span> penguins[<span class="st">"body_mass_g"</span>]</span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data<span class="op">=</span>penguins, x<span class="op">=</span><span class="st">"bill_size"</span>, y<span class="op">=</span><span class="st">"flipper_length_mm"</span>, hue<span class="op">=</span><span class="st">"island"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Re-run the cell and save the notebook. So far so good! Let’s test the notebook again with nbval. Just like before run it from the commandline with:</p>
+<div class="sourceCode" id="cb43"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="ex">pytest</span> <span class="at">--nbval</span> Analysis.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you’ve followed the instructions, this second run of nbval should generate a <code>FAILED</code> test, showing something like:</p>
+<pre><code>==================== short test summary info ====================
+FAILED Analysis.ipynb::Cell 3
+================== 1 failed, 3 passed in 1.83s ==================</code></pre>
+<p>What happened here was that we modified the cell where we calculated the <code>bill_size</code> value, but we didn’t re-run the cell where we output the mean of each column grouped by <code>island</code>. This means that the output of the last cell in the notebook now differs from what is actually stored in the notebook variables. This type of error can be difficult to spot, especially if you have a large notebook with many cells. Luckily, nbval can help us here.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Note that nbval reports cell numbers using 0-based numbering, so when the test fails on <code>Cell 3</code> it actually refers to the 4th cell in the notebook.</p>
+</div>
+</div>
+<p>This problem could have been solved if we had re-run the cell where we output the mean of each column grouped by <code>island</code>. In fact, it is good practice to re-run all cells in a notebook before saving it. If you in addition restart the kernel before re-running you make sure that you haven’t introduced any ‘hidden states’</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Ignoring specific cells">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Ignoring specific cells
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>One caveat of <code>nbval</code> is that it doesn’t work well with cells that generate plots. You can tell <code>nbval</code> to ignore the output of specific cells by adding <code># NBVAL_IGNORE_OUTPUT</code> to the top of a cell.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned: - How to use <code>nbdime</code> to view diffs of Jupyter notebooks - How to use <code>nbval</code> to test that notebooks work as expected</p>
+</div>
+</div>
+</section>
+<section id="converting-notebooks" class="level2" data-number="14">
+<h2 data-number="14" class="anchored" data-anchor-id="converting-notebooks"><span class="header-section-number">14</span> Converting notebooks</h2>
+<p>Notebooks can be converted to various output formats such as HTML, PDF, LaTeX <em>etc.</em> directly from the <strong>File</strong> -&gt; <strong>Save and Export Notebook As…</strong> menu.</p>
+<p>Conversion can also be performed on the command line using the <code>jupyter nbconvert</code> command. <code>nbconvert</code> is installed together with the <code>jupyter</code> Conda package and is executed on the command line by running <code>jupyter nbconvert</code>.</p>
+<p>The syntax for converting a Jupyter notebook is:</p>
+<div class="sourceCode" id="cb45"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> nbconvert <span class="at">--to</span> <span class="op">&lt;</span>FORMAT<span class="op">&gt;</span> notebook.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here <code>&lt;FORMAT&gt;</code> can be any of <code>asciidoc</code>, <code>custom</code>, <code>html</code>, <code>latex</code>, <code>markdown</code>, <code>notebook</code>, <code>pdf</code>, <code>python</code>, <code>rst</code>, <code>script</code>, <code>slides</code>. Converting to some output formats (<em>e.g.</em> PDF) may require you to install separate software such as <a href="https://pandoc.org/">Pandoc</a> or a <strong>TeX</strong> environment.</p>
+<p>Try converting the <code>jupyter-tutorial.ipynb</code> notebook that you have been working on for this tutorial to HTML using <code>jupyter nbconvert</code>.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If the plots in HTML rendered version of your notebook are not displayed properly, try changing the <code>matplotlib_inline.backend_inline.set_matplotlib_formats('pdf', 'svg')</code> line to <code>matplotlib_inline.backend_inline.set_matplotlib_formats('retina')</code>.</p>
+</div>
+</div>
+<p><code>nbconvert</code> can also be used to run a Jupyter notebook from the command line by running:</p>
+<div class="sourceCode" id="cb46"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> nbconvert <span class="at">--execute</span> <span class="at">--to</span> <span class="op">&lt;</span>FORMAT<span class="op">&gt;</span> notebook.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p><code>nbconvert</code> executes the cells in a notebook, captures the output and saves the results in a new file. Try running it on the <code>jupyter-tutorial.ipynb</code> notebook.</p>
+<p>You can also specify a different output file with <code>--output &lt;filename&gt;</code>.</p>
+<p>So in order to execute your <code>jupyter-tutorial.ipynb</code> notebook and save it to a file named <code>report.html</code> you could run:</p>
+<div class="sourceCode" id="cb47"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> nbconvert <span class="at">--to</span> html <span class="at">--output</span> report.html <span class="at">--execute</span> jupyter-tutorial.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to convert Jupyter notebooks to various other formats</li>
+<li>How to use <code>nbconvert</code> to convert notebooks on the command line</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="notebooks-and-quarto" class="level2" data-number="15">
+<h2 data-number="15" class="anchored" data-anchor-id="notebooks-and-quarto"><span class="header-section-number">15</span> Notebooks and Quarto</h2>
+<p>You may have noticed that a lot of the functionality in Jupyter is overlapping with Quarto. And you may be wondering which one to use. This is a difficult question to answer as it will depend on your use-case and personal preference. As such, any answer will be subjective, but we’ll try to give you some pointers on how to get the best out of both worlds.</p>
+<p>While similar in some ways Jupyter and Quarto are not completely overlapping. Quarto is great for generating high-quality reports and manuscripts, and is agnostic to the programming language used. Jupyter on the other hand is great for interactive data analysis and exploration with a more direct connection between code and output. While Jupyter is also somewhat agnostic to programming language, it is most commonly used with Python and with both the Jupyter and Python ecosystem at its back it can be customized with a lot of different extensions and plugins.</p>
+<p>The good news is that the two can be used together allowing you to get the best of both. For example, you may like the professional look of rendered Quarto documents but really like the interactive and exploratory nature of Jupyter. Well you can simply work as you normally do in Jupyter and then use Quarto to render the notebook to a high-quality report or manuscript.</p>
+<p>To give you an example, take a look at the <code>supplementary_material.ipynb</code> file in the <code>jupyter/</code> tutorial directory. Open this notebook in the Jupyter lab interface (make sure you have activated the <code>jupyter-env</code> Conda environment).</p>
+<p>As you can see this notebook contains some brief descriptions in Markdown and code to generate a few plots. It uses the output from the MRSA case-study Snakemake workflow you worked on in the Snakemake tutorial. This is a common use-case for Jupyter notebooks; to generate summary statistics and plots from the results of a workflow run. (A real-world example could of course include a lot more in-depth exploratory analyses).</p>
+<p>Now, let’s say you want to share the results of this notebook with your PI or collaborators. We could simply share the notebook file, or as we saw in the previous section, convert it to HTML or PDF via <code>jupybter nbconvert</code>.</p>
+<p>Let’s do that first so we have something to compare with. Run the following:</p>
+<div class="sourceCode" id="cb48"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="ex">jupyter</span> nbconvert <span class="at">--to</span> HTML <span class="at">--output</span> supplementary_material.nbconvert.html supplementary_material.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Open the <code>supplementary_material.nbconvert.html</code> file in a browser to see that it looks like you expect. This looks more or less like the original notebook.</p>
+<p>Now let’s go one step further and render the notebook to a high-quality report using Quarto. We can actually add a YAML header to the notebook with some document options that Quarto understands. Create a new cell in the notebook (from the Jupyter lab interface) and move it to the top. In this cell, add the following:</p>
+<div class="sourceCode" id="cb49"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="pp">---</span></span>
+<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a><span class="fu">title</span><span class="kw">:</span><span class="at"> Supplementary material</span></span>
+<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a><span class="fu">subtitle</span><span class="kw">:</span><span class="at"> Supplementary tables and plots for the MRSA study</span></span>
+<span id="cb49-4"><a href="#cb49-4" aria-hidden="true" tabindex="-1"></a><span class="fu">format</span><span class="kw">:</span></span>
+<span id="cb49-5"><a href="#cb49-5" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">html</span><span class="kw">:</span></span>
+<span id="cb49-6"><a href="#cb49-6" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">embed-resources</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb49-7"><a href="#cb49-7" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">code-fold</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb49-8"><a href="#cb49-8" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">code-tools</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span>
+<span id="cb49-9"><a href="#cb49-9" aria-hidden="true" tabindex="-1"></a><span class="fu">language</span><span class="kw">:</span></span>
+<span id="cb49-10"><a href="#cb49-10" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">code-summary</span><span class="kw">:</span><span class="at"> Click to show code</span></span>
+<span id="cb49-11"><a href="#cb49-11" aria-hidden="true" tabindex="-1"></a><span class="fu">bibliography</span><span class="kw">:</span><span class="at"> references.bib</span></span>
+<span id="cb49-12"><a href="#cb49-12" aria-hidden="true" tabindex="-1"></a><span class="pp">---</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Set the cell type to <code>Markdown</code>, then run the cell. Most likely that cell will look rather weird but that’s OK. We’ll fix that in a bit.</p>
+<p>Save the notebook and now render the document with Quarto from the commandline:</p>
+<div class="sourceCode" id="cb50"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="ex">quarto</span> render supplementary_material.ipynb</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Open up the <code>supplementary_material.html</code> file in a browser and compare it to the <code>supplementary_material.nbconvert.html</code> file. You should see that the Quarto version looks a lot better. The fact that Quarto supports rendering of Jupyter notebooks means you can keep editing your notebooks as you normally would and use Quarto for rendering the final document. Also there’s very little we had to change in the notebook to make it work with Quarto. If you look closely at the code cells used to generate the plots and table you’ll see that they contain code-chunk options in the same form we used in the Quarto tutorial. These options do not impact the notebook when run in Jupyter, making it easy to use the two tools in combination.</p>
+<p>Let’s go back to the YAML header cell and fix how it looks in the Jupyter notebook. The reason it looks weird is that Jupyter doesn’t understand the syntax. But luckily there’s a Jupyter lab Quarto extension you can install to fix this. Click the extension icon in the left sidebar and search for <code>quarto</code>. Install the <code>jupyterlab-quarto</code> extension and then reload the page. Now the YAML header should look a lot better.</p>
+<p>Try adding more options to the header to customize the look of the rendered document. For instance you could:</p>
+<ul>
+<li>add a Table of contents with (<code>toc: true</code>)</li>
+<li>try out different <a href="https://quarto.org/docs/output-formats/html-themes.html">themes</a></li>
+<li>add your name as author (<code>author: Your Name</code>)</li>
+<li>add a date (<code>date: last-modified</code>)</li>
+</ul>
+<p>and much more.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use Quarto to render Jupyter notebooks to high-quality reports.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="extra-material" class="level2" data-number="16">
+<h2 data-number="16" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">16</span> Extra material</h2>
+<p>The following material contains some additional tips and tricks on how to use Jupyter notebooks. This is not part of the core of the Jupyter material and you can choose what you want to go through, or skip it entirely.</p>
+<p>Here are some useful resources if you want to read more about Jupyter in general:</p>
+<ul>
+<li>The <a href="http://jupyter.org">Jupyter project site</a> contains a lot of information and inspiration.</li>
+<li>The <a href="https://jupyter-notebook.readthedocs.io/en/stable/">Jupyter Notebook documentation</a>.</li>
+<li>A <a href="http://ipywidgets.readthedocs.io/en/stable/index.html">guide</a> to using widgets for creating interactive notebooks.</li>
+</ul>
+</section>
+<section id="running-jupyter-notebooks-on-a-cluster" class="level2" data-number="17">
+<h2 data-number="17" class="anchored" data-anchor-id="running-jupyter-notebooks-on-a-cluster"><span class="header-section-number">17</span> Running Jupyter notebooks on a cluster</h2>
+<ul>
+<li>Login to Uppmax, making sure to use a specific login node, <em>e.g.</em> <code>rackham1</code>:</li>
+</ul>
+<pre><code>ssh &lt;your-user-name&gt;@rackham1.uppmax.uu.se</code></pre>
+<ul>
+<li>Create/activate a Conda environment containing <code>jupyter</code>, <em>e.g.</em>:</li>
+</ul>
+<pre><code>conda create -n jupyter -c conda-forge jupyter</code></pre>
+<ul>
+<li>activate the environment, then run:</li>
+</ul>
+<pre><code>jupyter notebook --no-browser</code></pre>
+<p>When the Jupyter server starts up you should see something resembling:</p>
+<pre><code>[I 2023-11-13 22:15:36.944 ServerApp] Serving notebooks from local directory: &lt;path-to-your-directory&gt;
+[I 2023-11-13 22:15:36.944 ServerApp] Jupyter Server 2.10.0 is running at:
+[I 2023-11-13 22:15:36.944 ServerApp] http://localhost:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6
+[I 2023-11-13 22:15:36.944 ServerApp]     http://127.0.0.1:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6
+[I 2023-11-13 22:15:36.944 ServerApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).</code></pre>
+<p>Now a Jupyter notebook server is running on the Uppmax end. The line that says:</p>
+<pre><code>[I 2023-11-13 22:15:36.944 ServerApp] http://localhost:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6</code></pre>
+<p>Contains information on the port used on the server side (8888 in this case) and the token required to use the server (<code>25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6</code>).</p>
+<p>Next step is to use this information to login to the server from your local computer.</p>
+<p><strong>On your local computer</strong></p>
+<p>In a terminal, run the following command to start port forwarding of port 8080 on your local computer to the remote port on the Uppmax side. Replace <remote-port> with the port given when you started the server on Uppmax. Also replace <your-user-name> with your user name on Uppmax.</your-user-name></remote-port></p>
+<div class="sourceCode" id="cb56"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ssh</span> <span class="at">-N</span> <span class="at">-L</span> localhost:8080:localhost:<span class="op">&lt;</span>remote-port<span class="op">&gt;</span> <span class="op">&lt;</span>your-user-name<span class="op">&gt;</span>@rackham1.uppmax.uu.se</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>As long as this process is running the port forwarding is running. To disable it simply interrupt it with <code>CTRL + C</code>.</p>
+<p>Connect to the Jupyter server by opening <code>localhost:8080</code> in your browser. When prompted, paste the token you got when starting the server on Uppmax and set a new password.</p>
+</section>
+<section id="using-binder-to-share-interactive-notebooks" class="level2" data-number="18">
+<h2 data-number="18" class="anchored" data-anchor-id="using-binder-to-share-interactive-notebooks"><span class="header-section-number">18</span> Using Binder to share interactive notebooks</h2>
+<p><a href="https://mybinder.org/">Binder</a> is a service that allows you to share Jupyter notebooks with others, while also allowing them to run the notebooks in the browser. This is great if you wish to share an analysis and have others interact with the code and results, without them having to install anything locally. What you will need is:</p>
+<ol type="1">
+<li>A public GitHub repository containing the notebooks you want to share.</li>
+<li>An <code>environment.yml</code> file in the repository containing the Conda environment required to run the notebooks.</li>
+<li>Data files (if any) required to run the notebook(s).</li>
+</ol>
+<p>Binder will then create a Docker image containing the Conda environment and the notebooks, and run a Jupyter server on this image. The Docker image is then hosted on the Binder server and can be used by anyone with the link to the repository to run the notebooks interactively in their browser.</p>
+<p>To show you an example we’ve created a basic <a href="https://github.com/NBISweden/workshop-reproducible-research-binder_example">GitHub repository</a> containing the <code>supplementary_material.ipynb</code> notebook from the previous section. If you go to the repository you will see a badge saying “launch binder”, click this to start the Binder server. This will take a few minutes the first time you do it, but after that it should be faster. When the server is ready you will be presented with the now familiar Jupyter interface. Go ahead and open up the <code>supplementary_material.ipynb</code> notebook and run it.</p>
+<p>You can now interact with the notebook as you would if you had it running on a local Jupyter server. You can change the code, run it, and see the results. You can also add new cells and write new code. However, you cannot save the changes you make to the notebook.</p>
+<p>To read more about Binder and how to use it, see the <a href="https://mybinder.readthedocs.io/en/latest/">Binder documentation</a>. For pointers on how to make data available to the notebooks you share via Binder, see this guide on <a href="https://the-turing-way.netlify.app/communication/binder/zero-to-binder.html#accessing-data-in-your-binder">Accessing data in your Binder</a>.</p>
+
+
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/pages/nextflow.html b/pages/nextflow.html
new file mode 100644
index 00000000..13f40db3
--- /dev/null
+++ b/pages/nextflow.html
@@ -0,0 +1,1848 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Working with Nextflow</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Working with Nextflow</h1>
+            <p class="subtitle lead">How to create reproducible workflows and computational pipelines</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a>
+  <ul>
+  <li><a href="#workflow-definitions" id="toc-workflow-definitions" class="nav-link" data-scroll-target="#workflow-definitions"><span class="header-section-number">2.1</span> Workflow definitions</a></li>
+  <li><a href="#process-definitions" id="toc-process-definitions" class="nav-link" data-scroll-target="#process-definitions"><span class="header-section-number">2.2</span> Process definitions</a></li>
+  <li><a href="#executing-workflows" id="toc-executing-workflows" class="nav-link" data-scroll-target="#executing-workflows"><span class="header-section-number">2.3</span> Executing workflows</a></li>
+  <li><a href="#viewing-channel-contents" id="toc-viewing-channel-contents" class="nav-link" data-scroll-target="#viewing-channel-contents"><span class="header-section-number">2.4</span> Viewing channel contents</a></li>
+  <li><a href="#files-and-sample-names" id="toc-files-and-sample-names" class="nav-link" data-scroll-target="#files-and-sample-names"><span class="header-section-number">2.5</span> Files and sample names</a></li>
+  <li><a href="#input-from-samplesheets" id="toc-input-from-samplesheets" class="nav-link" data-scroll-target="#input-from-samplesheets"><span class="header-section-number">2.6</span> Input from samplesheets</a></li>
+  <li><a href="#adding-more-processes" id="toc-adding-more-processes" class="nav-link" data-scroll-target="#adding-more-processes"><span class="header-section-number">2.7</span> Adding more processes</a></li>
+  </ul></li>
+  <li><a href="#executing-workflows-1" id="toc-executing-workflows-1" class="nav-link" data-scroll-target="#executing-workflows-1"><span class="header-section-number">3</span> Executing workflows</a>
+  <ul>
+  <li><a href="#reports-and-visualisations" id="toc-reports-and-visualisations" class="nav-link" data-scroll-target="#reports-and-visualisations"><span class="header-section-number">3.1</span> Reports and visualisations</a></li>
+  <li><a href="#logs" id="toc-logs" class="nav-link" data-scroll-target="#logs"><span class="header-section-number">3.2</span> Logs</a></li>
+  <li><a href="#re-running-workflows" id="toc-re-running-workflows" class="nav-link" data-scroll-target="#re-running-workflows"><span class="header-section-number">3.3</span> Re-running workflows</a></li>
+  </ul></li>
+  <li><a href="#working-with-processes" id="toc-working-with-processes" class="nav-link" data-scroll-target="#working-with-processes"><span class="header-section-number">4</span> Working with processes</a>
+  <ul>
+  <li><a href="#tags" id="toc-tags" class="nav-link" data-scroll-target="#tags"><span class="header-section-number">4.1</span> Tags</a></li>
+  <li><a href="#named-outputs" id="toc-named-outputs" class="nav-link" data-scroll-target="#named-outputs"><span class="header-section-number">4.2</span> Named outputs</a></li>
+  <li><a href="#advanced-publishing" id="toc-advanced-publishing" class="nav-link" data-scroll-target="#advanced-publishing"><span class="header-section-number">4.3</span> Advanced publishing</a></li>
+  <li><a href="#debugging" id="toc-debugging" class="nav-link" data-scroll-target="#debugging"><span class="header-section-number">4.4</span> Debugging</a></li>
+  </ul></li>
+  <li><a href="#workflow-configuration" id="toc-workflow-configuration" class="nav-link" data-scroll-target="#workflow-configuration"><span class="header-section-number">5</span> Workflow configuration</a>
+  <ul>
+  <li><a href="#parameters" id="toc-parameters" class="nav-link" data-scroll-target="#parameters"><span class="header-section-number">5.1</span> Parameters</a></li>
+  <li><a href="#command-line-parameters" id="toc-command-line-parameters" class="nav-link" data-scroll-target="#command-line-parameters"><span class="header-section-number">5.2</span> Command line parameters</a></li>
+  <li><a href="#configuring-inputs" id="toc-configuring-inputs" class="nav-link" data-scroll-target="#configuring-inputs"><span class="header-section-number">5.3</span> Configuring inputs</a></li>
+  <li><a href="#other-configuration-scopes" id="toc-other-configuration-scopes" class="nav-link" data-scroll-target="#other-configuration-scopes"><span class="header-section-number">5.4</span> Other configuration scopes</a></li>
+  </ul></li>
+  <li><a href="#optimising-the-mrsa-workflow" id="toc-optimising-the-mrsa-workflow" class="nav-link" data-scroll-target="#optimising-the-mrsa-workflow"><span class="header-section-number">6</span> Optimising the MRSA workflow</a>
+  <ul>
+  <li><a href="#remote-files" id="toc-remote-files" class="nav-link" data-scroll-target="#remote-files"><span class="header-section-number">6.1</span> Remote files</a></li>
+  <li><a href="#subworkflows" id="toc-subworkflows" class="nav-link" data-scroll-target="#subworkflows"><span class="header-section-number">6.2</span> Subworkflows</a></li>
+  </ul></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">7</span> Extra material</a>
+  <ul>
+  <li><a href="#using-containers-in-nextflow" id="toc-using-containers-in-nextflow" class="nav-link" data-scroll-target="#using-containers-in-nextflow"><span class="header-section-number">7.1</span> Using containers in Nextflow</a></li>
+  <li><a href="#using-conda-in-nextflow" id="toc-using-conda-in-nextflow" class="nav-link" data-scroll-target="#using-conda-in-nextflow"><span class="header-section-number">7.2</span> Using Conda in Nextflow</a></li>
+  <li><a href="#running-nextflow-on-uppmax" id="toc-running-nextflow-on-uppmax" class="nav-link" data-scroll-target="#running-nextflow-on-uppmax"><span class="header-section-number">7.3</span> Running Nextflow on Uppmax</a></li>
+  <li><a href="#advanced-channel-creation" id="toc-advanced-channel-creation" class="nav-link" data-scroll-target="#advanced-channel-creation"><span class="header-section-number">7.4</span> Advanced channel creation</a></li>
+  <li><a href="#using-groovy-in-processes" id="toc-using-groovy-in-processes" class="nav-link" data-scroll-target="#using-groovy-in-processes"><span class="header-section-number">7.5</span> Using Groovy in processes</a></li>
+  <li><a href="#the-nf-core-pipeline-collection" id="toc-the-nf-core-pipeline-collection" class="nav-link" data-scroll-target="#the-nf-core-pipeline-collection"><span class="header-section-number">7.6</span> The nf-core pipeline collection</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p><a href="https://www.nextflow.io/">Nextflow</a> is a <em>workflow management system</em> (WfMS), and is one of the most common such systems within the bioinformatic and academic communities. These systems are important for scientific reproducibility in that they greatly facilitate keeping track of which files have been processed in what way throughout an entire project.</p>
+<p>Nextflow is built from the ground-up to be portable, scalable, reproducible and usable in a platform-agnostic sense. This means that any workflow you write in Nextflow can be run locally on your laptop, a computer cluster or a cloud service (as long as your architecture has the necessary computational resources). You can also define the compute environment in which each task is carried out on a per-task basis. You might thus develop your workflow on your local computer using a minimal test dataset, but run the full analyses with all samples on <em>e.g.</em> a computer cluster. Nextflow can work on both files and arbitrary values, often-times connected in useful and advanced ways.</p>
+<p>Nextflow can easily work with dynamic inputs where the exact output is unknown, <em>e.g.</em> the exact number of files or which samples pass some arbitrary quality control threshold. While Nextflow is based on the Groovy language, you don’t need to know how to code Groovy to be able to write good Nextflow workflows. Nextflow has a large community centred around it, including the <a href="https://nf-co.re/">nf-core</a> curated collection of high quality pipelines used by <em>e.g.</em> the <a href="https://ngisweden.scilifelab.se/">National Genomics Infrastructure</a>.</p>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to <code>workshop-reproducible-research/tutorials/nextflow</code> and activate your <code>nextflow-env</code> Conda environment.</p>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>We’ll start by creating a very simple workflow from scratch, to show how Nextflow works: it will take two input files and convert them to UPPERCASE letters.</p>
+<ul>
+<li>Start by running the following commands:</li>
+</ul>
+<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">touch</span> main.nf</span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="st">"This is a.txt"</span> <span class="op">&gt;</span> a.txt</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="st">"This is b.txt"</span> <span class="op">&gt;</span> b.txt</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Open the <code>main.nf</code> file with an editor of your choice. This is the main workflow file used in Nextflow, where workflows and their processes are defined.</p>
+<ul>
+<li>Copy the following code into your <code>main.nf</code> file:</li>
+</ul>
+<pre class="nextflow"><code>// Workflow definition
+workflow {
+    // Define input files
+    ch_input = Channel.fromPath( "a.txt" )
+
+    // Run workflow
+    CONVERT_TO_UPPER_CASE( ch_input )
+}
+
+// Process definition
+process CONVERT_TO_UPPER_CASE {
+    publishDir "results/",
+        mode: "copy"
+
+    input:
+    path(file)
+
+    output:
+    path("a.upper.txt")
+
+    script:
+    """
+    tr [a-z] [A-Z] &lt; ${file} &gt; a.upper.txt
+    """
+}</code></pre>
+<p>Here we have two separate parts. The first is the <em>workflow definition</em>, while the last is a <em>process</em>. Let’s go through them both in more detail!</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Nextflow comments">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Nextflow comments
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Double-slashes (<code>//</code>) are used for comments in Nextflow.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Nextflow and whitespace">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Nextflow and whitespace
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Nextflow is not indentation-sensitive. In fact, Nextflow doesn’t care at all about whitespace, so go ahead and use it in whatever manner you think is easiest to read and work with! Do keep in mind that indentations and other types of whitespace <em>does</em> improve readability, so it’s generally not a good idea to forego it entirely, even though you can.</p>
+</div>
+</div>
+<section id="workflow-definitions" class="level3" data-number="2.1">
+<h3 data-number="2.1" class="anchored" data-anchor-id="workflow-definitions"><span class="header-section-number">2.1</span> Workflow definitions</h3>
+<pre class="nextflow"><code>workflow {
+    // Define input files
+    ch_input = Channel.fromPath( "a.txt" )
+
+    // Run workflow
+    CONVERT_TO_UPPER_CASE( ch_input )
+}</code></pre>
+<p>The workflow definition here has two parts, each doing an important job for any Nextflow workflow. The first part defines a <em>channel</em>, which is an asynchronous first-in-first-out stream of data that connect a workflow’s various inputs and outputs. In simpler terms, channels contain the data that you want to process with the workflow and can be passed between the various parts of the workflow.</p>
+<p>Channels can be created in various different ways using <em>channel factories</em>, depending on what type data you want to put into them and where this data is stored. In this particular case we define our <code>ch_input</code> channel using the <code>.fromPath</code> channel factory, which takes a file path as input - here we use the <code>a.txt</code> file. You can thus read <code>ch_input = Channel.fromPath("a.txt")</code> as <em>“create the channel <code>ch_input</code> and send the file <code>a.txt</code> into it”</em>.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Naming channels">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Naming channels
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>A channel can be named anything you like, but it is good practice to prepend them with <code>ch_</code>, as that makes it clear which variables are channels and which are just normal variables.</p>
+</div>
+</div>
+<p>How do we use these channels then? Channels pass data to and from processes through our workflow. By providing channels as arguments to processes, we describe how we want data to flow. This is exactly what we do in the second part: we call our <code>CONVERT_TO_UPPER_CASE</code> process with the <code>ch_input</code> as input argument - this is very similar to functional programming.</p>
+<p>This is our entire workflow, for now: the creation of a channel followed by using the contents of that channel as input to a single process. Let’s look at how processes themselves are defined!</p>
+</section>
+<section id="process-definitions" class="level3" data-number="2.2">
+<h3 data-number="2.2" class="anchored" data-anchor-id="process-definitions"><span class="header-section-number">2.2</span> Process definitions</h3>
+<pre class="nextflow"><code>process CONVERT_TO_UPPER_CASE {
+    publishDir "results/",
+        mode: "copy"
+
+    input:
+    path(file)
+
+    output:
+    path("a.upper.txt")
+
+    script:
+    """
+    tr [a-z] [A-Z] &lt; ${file} &gt; a.upper.txt
+    """
+}</code></pre>
+<p>Looking at the process in the code above, we can see several parts. The process block starts with its name, in this case <code>CONVERT_TO_UPPER_CASE</code>, followed by several sections, or <em>directives</em> as Nextflow calls them: <code>publishDir</code>, <code>input</code>, <code>output</code> and <code>script</code>.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Naming processes">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Naming processes
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>A process can be named using any case, but a commonly used convention is to use UPPERCASE letters for processes to visually distinguish them in the workflow. You do not have to follow this if you don’t want to, but we do so here.</p>
+</div>
+</div>
+<p>Let’s start with the first directive: <code>publishDir</code>. This tells Nextflow where the output of the process should be placed when it is finished. Setting <code>mode</code> to <code>"copy"</code> just means that we want to copy the output files to the publishing directory, rather than using a symbolic link (which is the default).</p>
+<p>The <code>input</code> and <code>output</code> directives describe the data expected to come through this specific process. Each line of <code>input</code> describes the data expected for each process argument, in the order used in the workflow. In this case, <code>CONVERT_TO_UPPER_CASE</code> expects a single channel (one line of input), and expects the data to be filenames ( <em>i.e.</em> of type <code>path</code>). The <code>script</code> directive is where you put the code that the process should execute.</p>
+<p>Notice that there is a difference between how the inputs and outputs are declared? The <code>output</code> is an explicit string (<em>i.e.</em> surrounded by quotes), while the input is a variable named <code>file</code>. This means inputs can be referenced in the process without naming the data explicitly, unlike the output where the name needs to be explicit. We’ll get back to exactly how this works in just a moment. While the name of the input variable here is chosen to be the descriptive <code>file</code>, we could also have chosen something completely different, <em>e.g.</em> <code>banana</code> (we’d also have to change its reference in the <code>script</code> directive).</p>
+</section>
+<section id="executing-workflows" class="level3" data-number="2.3">
+<h3 data-number="2.3" class="anchored" data-anchor-id="executing-workflows"><span class="header-section-number">2.3</span> Executing workflows</h3>
+<p>Let’s try running the workflow we just created!</p>
+<ul>
+<li>Type the following in your terminal:</li>
+</ul>
+<div class="sourceCode" id="cb5"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="ex">nextflow</span> run main.nf</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will make Nextflow run the workflow specified in your <code>main.nf</code> file. You should see something along these lines:</p>
+<pre class="no-highlight"><code>N E X T F L O W  ~  version 22.10.6
+Launching `./main.nf` [mad_legentil] - revision: 87f0c253ed
+executor &gt;  local (1)
+[32/9124a1] process &gt; CONVERT_TO_UPPER_CASE (1) [100%] 1 of 1 ✔</code></pre>
+<p>The first few lines are information about this particular run, including the Nextflow version used, which workflow definition file was used, a randomly generated run name (an adjective and a scientist), the revision ID as well as where the processes were executed (locally, in this case, as opposed to <em>e.g.</em> SLURM or AWS).</p>
+<p>What follows next is a list of all the various processes for this particular workflow. The order does not necessarily reflect the order of execution (depending on each process’ input and output dependencies), but they are in the order they were defined in the workflow file - there’s only the one process here, of course. The first part (<em>e.g.</em> <code>[32/9124a1]</code>) is the process ID, which is also the first part of the subdirectory in which the process is run (the full subdirectory will be something like <code>32/9124a1dj56n2346236245i2343</code>, so just a longer hash). We then get the process and its name. Lastly, we get how many instances of each process are currently running or have finished. Here we only have the one process, of course, but this will soon change.</p>
+<ul>
+<li><p>Let’s check that everything worked: type <code>ls results/</code> and see that it contains the output we expected.</p></li>
+<li><p>Let’s explore the working directory: change into whatever directory is specified by the process ID (your equivalent to <code>work/32/9124a1[...]</code>).</p></li>
+</ul>
+<p>What do you see when you list the contents of this directory? You should see a symbolic link named <code>a.txt</code> pointing to the real location of this file, plus a normal file <code>a.upper.txt</code>, which is the output of the process that was run in this directory. You generally only move into these work directories when debugging errors in your workflow, and Nextflow has some tricks to make this process a lot easier - more on this later.</p>
+<p>So, in summary: we have three components: a set of inputs stored in a channel, a set of processes and a workflow that defines which processes should be run in what order. We tell Nextflow to <em>push</em> the inputs through the entire workflow, so to speak.</p>
+<ul>
+<li><p>Now it’s your turn! Move back to the workflow root and make it use only the <code>b.txt</code> input file and give you the <code>b.upper.txt</code> instead.</p></li>
+<li><p>Run your workflow and make sure it works before you move on; check below if you’re having trouble.</p></li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-5-contents" aria-controls="callout-5" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-5" class="callout-5-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>ch_input = Channel.fromPath( "b.txt" )</code></pre>
+</div>
+</div>
+</div>
+</section>
+<section id="viewing-channel-contents" class="level3" data-number="2.4">
+<h3 data-number="2.4" class="anchored" data-anchor-id="viewing-channel-contents"><span class="header-section-number">2.4</span> Viewing channel contents</h3>
+<p>Something that’s highly useful during development of Nextflow workflows is to view the contents of channels, which can be done with the <code>view()</code> operator.</p>
+<ul>
+<li><p>Add the following to your workflow definition (on a new line) and execute the workflow: <code>ch_input.view()</code>. What do you see?</p></li>
+<li><p>Remove the <code>view()</code> operator once you’re done.</p></li>
+</ul>
+<p>It can be quite helpful to view the channel contents whenever you’re unsure of what a channel contains or if you’ve run into some kind of bug or error, or even just when you’re adding something new to your workflow. Remember to view the channel contents whenever you need to during the rest of this tutorial!</p>
+</section>
+<section id="files-and-sample-names" class="level3" data-number="2.5">
+<h3 data-number="2.5" class="anchored" data-anchor-id="files-and-sample-names"><span class="header-section-number">2.5</span> Files and sample names</h3>
+<p>One powerful feature of Nextflow is that it can handle complex data structures as input, and not only filenames. One of the more useful things this allows us to do is to couple sample names with their respective data files inside channels.</p>
+<ul>
+<li>Change the channel definition to the following:</li>
+</ul>
+<pre class="nextflow"><code>ch_input = Channel
+    .fromPath ( "a.txt" )
+    .map      { file -&gt; tuple(file.getBaseName(), file) }</code></pre>
+<p>Here we create a <em>tuple</em> (something containing multiple parts) using the <code>map</code> operator, the <em>base name</em> of the file (<code>a</code>) and the file path (<code>a.txt</code>). The statement <code>.map{ file -&gt; tuple(file.getBaseName(), file) }</code> can thus be read as “replace the channel’s contents with a tuple containing the base name and the file path”. The contents of the channel thus change from <code>[a.txt]</code> to <code>[a, a.txt]</code>. Passing the sample name or ID together with the sample data in this way is extremely useful in a workflow context and can greatly simplify downstream processes.</p>
+<p>Before this will work, however, we have to change the process itself to make use of this new information contained in the <code>ch_input</code> channel.</p>
+<ul>
+<li>Change the process definition to the following:</li>
+</ul>
+<pre class="nextflow"><code>process CONVERT_TO_UPPER_CASE {
+    publishDir "results/",
+        mode: "copy"
+
+    input:
+    tuple val(sample), path(file)
+
+    output:
+    path("${sample}.upper.txt")
+
+    script:
+    """
+    tr [a-z] [A-Z] &lt; ${file} &gt; ${sample}.upper.txt
+    """
+}</code></pre>
+<p>Notice how the input now is aware that we’re passing a tuple as input, which allows us to use both the <code>file</code> variable (as before) and the new <code>sample</code> variable. All that’s left now is to change the input to our pipeline!</p>
+<ul>
+<li>Change the channel definition line from <code>.fromPath ( "a.txt" )</code> to <code>.fromPath ( ["a.txt", "b.txt"] )</code> and try running the pipeline. Make sure it works before you move on! Remember to use the <code>view()</code> operator if you want to inspect the channel contents in detail.</li>
+</ul>
+</section>
+<section id="input-from-samplesheets" class="level3" data-number="2.6">
+<h3 data-number="2.6" class="anchored" data-anchor-id="input-from-samplesheets"><span class="header-section-number">2.6</span> Input from samplesheets</h3>
+<p>So far we’ve been specifying inputs using strings inside the workflow itself, but hard-coding inputs like this is not ideal. A better solution is to use samplesheets instead, <em>e.g.</em> comma- or tab-separated data files; this is standard for many pipelines, including <a href="https://nf-co.re/">nf-core</a>. Take, for example, the following CSV file:</p>
+<pre class="no-highlight"><code>a,a.txt
+b,b.txt</code></pre>
+<p>This specifies the samples and their respective files on each row. Using such a file is much more portable, scalable and overall easier to use than simple hard-coding things in the workflow definition itself. We might also include an arbitrary number of additional metadata columns, useful for downstream processing and analyses. Using contents of files as input can be done using the <code>.splitCsv()</code> and <code>.map{}</code> operators, like so:</p>
+<pre class="nextflow"><code>ch_input = Channel
+    .fromPath ( "first_samplesheet.csv" )
+    .splitCsv ( )
+    .map      { row -&gt; tuple(row[0], file(row[1])) }</code></pre>
+<p>The <code>.SplitCsv()</code> operator lets the channel know the input is a CSV file, while the <code>.map{}</code> operator makes the CSV content into a tuple from the first and second elements of each row.</p>
+<ul>
+<li><p>Change the input channel definition to the code above and create the <code>first_samplesheet.csv</code> file as shown above.</p></li>
+<li><p>Add the <code>.view()</code> operator somewhere to show the contents of <code>ch_input</code>.</p></li>
+<li><p>Execute the pipeline. Do you see what you expect? Remove the <code>.view()</code> operator before moving on.</p></li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>While we are still hard-coding the name of the samplesheet it is still much better to edit a samplesheet than having to edit the pipeline itself - there are also convenient ways to work around this using <em>parameters</em>, which we’ll talk more about later in this tutorial.</p>
+</div>
+</div>
+<p>We can also specify a header in our samplesheet like so: <code>.splitCsv(header: true)</code>. This will allow us to reference the columns using their names instead of their index, <em>e.g.</em> <code>row.col1</code> instead of <code>row[0]</code>.</p>
+<ul>
+<li>Add an appropriate header to your samplesheet, make sure your workflow can read it and execute. Use <code>.view()</code> to see what’s going on, if needed.</li>
+</ul>
+</section>
+<section id="adding-more-processes" class="level3" data-number="2.7">
+<h3 data-number="2.7" class="anchored" data-anchor-id="adding-more-processes"><span class="header-section-number">2.7</span> Adding more processes</h3>
+<p>It’s time to add more processes to our workflow! We have the two files <code>a.upper.txt</code> and <code>b.upper.txt</code>; the next part of the workflow is a step that concatenates the content of all these UPPERCASE files.</p>
+<p>We already have a channel containing the two files we need: the output of the <code>CONVERT_TO_UPPER_CASE</code> process called <code>CONVERT_TO_UPPER_CASE.out</code>. We can use this output as input to a new process using the syntax: <code>CONVERT_TO_UPPER_CASE.out.collect()</code>. The <code>collect()</code> operator groups all the outputs in the channel into a single data object for the next process. This is a <em>many-to-one</em> type of operation: a stream with several files (<em>many</em>) is merged into a lone list of files (<em>one</em>). If <code>collect()</code> was not used, the next process would try to run a task for each file in the output channel.</p>
+<p>Let’s put this in use by adding a new process to the workflow definition. We’ll call this process <code>CONCATENATE_FILES</code> and it will take the output from <code>CONVERT_TO_UPPER_CASE</code> as input, grouped using the <code>collect()</code> operator.</p>
+<ul>
+<li>Add a line to your workflow definition for this new process with the appropriate input - remember that you can use <code>.view()</code> to check channel contents; click below if you’re having trouble.</li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-7-contents" aria-controls="callout-7" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-7" class="callout-7-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>CONCATENATE_FILES( CONVERT_TO_UPPER_CASE.out.collect() )</code></pre>
+</div>
+</div>
+</div>
+<p>Now all we have to do is define the actual <code>CONCATENATE_FILES</code> process in the process definition section.</p>
+<ul>
+<li>Copy the following code as a new process into your workflow:</li>
+</ul>
+<pre class="nextflow"><code>process CONCATENATE_FILES {
+    publishDir "results/",
+        mode: "copy"
+
+    input:
+    path(files)
+
+    output:
+    path("*.txt")
+
+    script:
+    """
+    cat ${files} &gt; concat.txt
+    """
+}</code></pre>
+<ul>
+<li><p>Run your workflow again and check the <code>results/</code> directory. At this point you should have three files there: <code>a.upper.txt</code>, <code>b.upper.txt</code> and <code>concat.txt</code>.</p></li>
+<li><p>Inspect the contents of <code>concat.txt</code> - do you see everything as you expected?</p></li>
+</ul>
+<p>Note the use of <code>path(files)</code> as input. Although we pass a list of files as input, the list is considered a single object, and so the <code>files</code> variable references a list. Each file in that list can be individually accessed using an index e.g.&nbsp;<code>${files[0]}</code>, or as we do here, use the variable without an index to list all the input files.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learnt:</p>
+<ul>
+<li>How to create, execute and extend workflows</li>
+<li>How to explore the <code>work</code> directory and channel contents</li>
+<li>How to couple sample names to sample data files</li>
+<li>How to use samplesheets as input</li>
+<li>How to collect multiple files as single inputs for processes</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="executing-workflows-1" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="executing-workflows-1"><span class="header-section-number">3</span> Executing workflows</h2>
+<p>It’s time to start working with a more realistic workflow using the MRSA case study of this course! We’ve created a bare-bones version of this pipeline for you, but we’ll work our way through it as we go along and learn more about Nextflow’s features and functionality. The MRSA workflow looks like this:</p>
+<pre class="nextflow"><code>workflow {
+
+    // Workflow for generating count data for the MRSA case study
+
+    // Get input files from a samplesheet
+    ch_input = Channel
+        .fromPath ( "samplesheet.csv" )
+        .splitCsv ( header: true)
+
+    // Define the workflow
+    DOWNLOAD_FASTQ_FILES (
+        ch_input
+    )
+    RUN_FASTQC (
+        DOWNLOAD_FASTQ_FILES.out
+    )
+    RUN_MULTIQC (
+        RUN_FASTQC.out[1].collect()
+    )
+    GET_GENOME_FASTA ()
+    INDEX_GENOME (
+        GET_GENOME_FASTA.out.fasta
+    )
+    ALIGN_TO_GENOME (
+        DOWNLOAD_FASTQ_FILES.out,
+        INDEX_GENOME.out.index
+    )
+    SORT_BAM (
+        ALIGN_TO_GENOME.out.bam
+    )
+    GET_GENOME_GFF3 ()
+    GENERATE_COUNTS_TABLE (
+        SORT_BAM.out.bam.collect(),
+        GET_GENOME_GFF3.out.gff
+    )
+}</code></pre>
+<p>The workflow has one input channel named <code>ch_input</code>, which reads input from the <code>samplesheet.csv</code> file. We then define the processes to be executed by this workflow, nine in total. The first process (<code>DOWNLOAD_FASTQ_FILES</code>) takes the <code>ch_input</code> channel as input, while the rest of the processes takes the output of previous processes as input. Before we go into more detail regarding the ins-and-outs of this workflow, let’s start with some specifics of how workflows are executed and what you can get from them.</p>
+<section id="reports-and-visualisations" class="level3" data-number="3.1">
+<h3 data-number="3.1" class="anchored" data-anchor-id="reports-and-visualisations"><span class="header-section-number">3.1</span> Reports and visualisations</h3>
+<p>Let’s start with running the workflow plus getting some reports and visualisation while we’re at it!</p>
+<ul>
+<li>Run the workflow using the following command: <code>nextflow run main_mrsa.nf   -with-report report.html -with-timeline timeline.html -with-dag dag.png</code>.</li>
+</ul>
+<p>After successful executing, you will find three more files in your current directory: <code>report.html</code>, <code>timeline.html</code> and <code>dag.png</code>. The first file contains a workflow report, which includes various information regarding execution such as runtime, resource usage and details about the different processes. The second file contains a timeline for how long each individual process took to execute, while the last contains a visualisation of the workflow itself.</p>
+<p>Take a few minutes to browse these files for yourself. When running a workflow you can of course choose which of these additional files you want to include by picking which ones are important or interesting to you - or don’t include any!</p>
+</section>
+<section id="logs" class="level3" data-number="3.2">
+<h3 data-number="3.2" class="anchored" data-anchor-id="logs"><span class="header-section-number">3.2</span> Logs</h3>
+<p>Nextflow keeps a log of all the workflows that have been executed. Let’s check it out!</p>
+<ul>
+<li>Type <code>nextflow log</code> to get a list of all the executions.</li>
+</ul>
+<p>Here we get information about when the workflow was executed, how long it ran, its run name, whether it succeeded or not and what command was used to run it. You can also use <code>nextflow log &lt;run name&gt;</code> to show each task’s directory that was executed for that run. You can also supply the <code>-f</code> (or <code>-fields</code>) flag along with additional fields to show.</p>
+<ul>
+<li>Run <code>nextflow log &lt;run name&gt; -f hash,name,exit,status</code></li>
+</ul>
+<p>This shows us not only the beginning of each task’s working directory, but also its name, exit code and status (<em>i.e.</em> if it completed successfully or failed in some manner).</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Listing fields">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Listing fields
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you want to see a complete list of all the fields you might explore using the log, just type <code>nextflow log -l</code> or <code>nextflow log -list-fields</code>. This is highly useful for debugging when there’s some specific information about a run you’re particularly interested in!</p>
+</div>
+</div>
+<p>We can also get even more detailed information about the latest run by looking into the <code>.nextflow.log</code> file!</p>
+<ul>
+<li>Look into the latest log by typing <code>less .nextflow.log</code>.</li>
+</ul>
+<p>You’ll be greeted by a wealth of debugging information, which may even seem a bit overkill at this point! This level of detail is, however, quite useful both as a history of what you’ve attempted and as an additional help when you run into errors! Also, it helps with advanced debugging - which we’ll get into later.</p>
+</section>
+<section id="re-running-workflows" class="level3" data-number="3.3">
+<h3 data-number="3.3" class="anchored" data-anchor-id="re-running-workflows"><span class="header-section-number">3.3</span> Re-running workflows</h3>
+<p>Something you often want to do in Nextflow (or any WfMS for that matter) is to re-run the workflow when you changed some input files or some of the code for its analyses, but you don’t want to re-run the entire workflow from start to finish. Let’s find out how this works in Nextflow!</p>
+<ul>
+<li>Run the same <code>nextflow run main_mrsa.nf</code> command again.</li>
+</ul>
+<p>You’ll notice that Nextflow actually re-ran the entire workflow from scratch, even though we didn’t change anything. This is the default behaviour of Nextflow.</p>
+<ul>
+<li>Let’s try that again: <code>nextflow run main_mrsa.nf -resume</code> instead.</li>
+</ul>
+<p>Now you can see that Nextflow didn’t actually re-run anything. The <code>-resume</code> flag instructed Nextflow to use the cached results from the previous run!</p>
+<p>Nextflow automatically keeps track of not only changes to input files, but also changes to code, process definitions and scripts. You can thus change anything relating to your workflow and just re-run with the <code>-resume</code> flag and be sure that only processes relevant to your changes are executed again!</p>
+<ul>
+<li>Use <code>tree work/</code> to list the contents of the work directory.</li>
+</ul>
+<p>Because Nextflow keeps track of all the runs, we’ve now got two sets of files in the work directory. One set from the first run, and another from the second run. This can take up valuable space, so let’s clean that up.</p>
+<ul>
+<li>Use <code>nextflow clean -n -before &lt;run_name&gt;</code> to show which work directories will be cleaned up (use <code>nextflow log</code> to find the run name if you don’t remember it). Then delete those directories by changing <code>-n</code> (dry-run) to <code>-f</code> (force).</li>
+</ul>
+<p>Nextflow’s <code>clean</code> subcommand can be used to clean up failed tasks and unused processes. Here we used the <code>-before</code> flag, meaning that any runs before the specified run are removed; use <code>nextflow help clean</code> to see other options for cleaning. This is the preferred way to clean up the working directory.</p>
+<ul>
+<li>Remove the <code>results</code> directory and re-run the workflow again using the <code>-resume</code> flag.</li>
+</ul>
+<p>We removed all the results we used before, but we still managed to resume the workflow and use its cache - how come? Remember that Nextflow uses the <code>work</code> directory to run all of its tasks, while the <code>results</code> directory is just where we have chosen to publish our outputs. We can thus delete the <code>results</code> directory as often as we like (a necessity when output filenames are changed) and still get everything back without having to re-run anything. If we were to delete the <code>work</code> directory, however…</p>
+<ul>
+<li>Delete the <code>work</code> directory and re-run the workflow using the <code>-resume</code> flag.</li>
+</ul>
+<p>There is no longer any cache for Nextflow to use, so it re-runs from the start! This is good to keep in mind: you can always delete the output directories of your workflow, but if you mess with <code>work</code> you’ll lose, well… work!</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learnt:</p>
+<ul>
+<li>How to get automatic reports and visualisations</li>
+<li>How to check the Nextflow logs</li>
+<li>How to re-run workflows</li>
+<li>How to clean the Nextflow cache</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="working-with-processes" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="working-with-processes"><span class="header-section-number">4</span> Working with processes</h2>
+<p>Now that we’ve gone through the specifics of executing workflows in a bit more detail, let’s go through working with processes. While there are numerous process directives that can be used, we’ll go through some of the more commonly used ones here.</p>
+<section id="tags" class="level3" data-number="4.1">
+<h3 data-number="4.1" class="anchored" data-anchor-id="tags"><span class="header-section-number">4.1</span> Tags</h3>
+<p>Let’s look at the command line output we got during the workflow’s execution, which should look something like this:</p>
+<div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">N</span> E X T F L O W  ~  version 22.10.6</span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="ex">Launching</span> <span class="kw">`</span><span class="ex">./main.nf</span><span class="kw">`</span> <span class="pp">[</span><span class="ss">friendly_bhaskara</span><span class="pp">]</span> <span class="at">-</span> revision: b4490b9201</span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="ex">executor</span> <span class="op">&gt;</span>  local <span class="er">(</span><span class="ex">17</span><span class="kw">)</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="ex">[c9/e5f818]</span> process <span class="op">&gt;</span> DONWLOAD_FASTQ_FILES <span class="er">(</span><span class="ex">SRR935092</span><span class="kw">)</span> <span class="ex">[100%]</span> 3 of 3 ✔</span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="ex">[d5/b5f24e]</span> process <span class="op">&gt;</span> RUN_FASTQC <span class="er">(</span><span class="ex">SRR935092</span><span class="kw">)</span>           <span class="ex">[100%]</span> 3 of 3 ✔</span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="ex">[91/2cea54]</span> process <span class="op">&gt;</span> RUN_MULTIQC                      <span class="pp">[</span><span class="ss">100%</span><span class="pp">]</span> 1 of 1 ✔</span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="ex">[e0/b4fd37]</span> process <span class="op">&gt;</span> GET_GENOME_FASTA                 <span class="pp">[</span><span class="ss">100%</span><span class="pp">]</span> 1 of 1 ✔</span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="ex">[87/32ce10]</span> process <span class="op">&gt;</span> INDEX_GENOME                     <span class="pp">[</span><span class="ss">100%</span><span class="pp">]</span> 1 of 1 ✔</span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="ex">[56/e9a460]</span> process <span class="op">&gt;</span> ALIGN_TO_GENOME <span class="er">(</span><span class="ex">SRR935092</span><span class="kw">)</span>      <span class="ex">[100%]</span> 3 of 3 ✔</span>
+<span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a><span class="ex">[ed/d8c223]</span> process <span class="op">&gt;</span> SORT_BAM <span class="er">(</span><span class="ex">SRR935092</span><span class="kw">)</span>             <span class="ex">[100%]</span> 3 of 3 ✔</span>
+<span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a><span class="ex">[e7/4a6bda]</span> process <span class="op">&gt;</span> GET_GENOME_GFF3                  <span class="pp">[</span><span class="ss">100%</span><span class="pp">]</span> 1 of 1 ✔</span>
+<span id="cb15-12"><a href="#cb15-12" aria-hidden="true" tabindex="-1"></a><span class="ex">[e9/84f093]</span> process <span class="op">&gt;</span> GENERATE_COUNTS_TABLE            <span class="pp">[</span><span class="ss">100%</span><span class="pp">]</span> 1 of 1 ✔</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Have you noticed that there are SRA IDs after some of the processes? Well, if you look at which processes show these SRA IDs you might see that it’s only those processes that are executed three times, <em>i.e.</em> once per SRA ID. This doesn’t happen automatically, however, and comes from something called <em>tags</em>. Let’s look at the <code>DONWLOAD_FASTQ_FILES</code> process:</p>
+<pre class="nextflow"><code>process DONWLOAD_FASTQ_FILES {
+
+    // Download a single-read FASTQ file from the SciLifeLab Figshare remote
+
+    tag "${sra_id}"
+    publishDir "results/data",
+        mode: "copy"
+
+    input:
+    tuple val(sra_id), val(figshare_link)
+
+    output:
+    tuple val(sra_id), path("*.fastq.gz")
+
+    script:
+    """
+    wget ${figshare_link} -O ${sra_id}.fastq.gz
+    """
+}</code></pre>
+<p>You can see the <code>tag</code> directive at the very top of the process definition. Tags can be used to <em>e.g.</em> show information about the sample currently being analysed by the process. This is useful both during run-time (allowing you to see which sample is being processed) but also for debugging or finding problematic samples in case of errors or odd output. There is, naturally, no need to use tags for processes which are only run once.</p>
+<ul>
+<li>Comment out (prefix with <code>//</code>) the <code>tag</code> directive from the <code>DONWLOAD_FASTQ_FILES</code> process and run the workflow again. What do you see?</li>
+</ul>
+<p>Without the <code>tag</code> directive you should instead see the numbers 1 through 3, representing the input files (of which there are three). Nextflow still tells us that it’s working on one of the input files, but it’s generally much more useful to actually see the sample name or ID, rather than just a number.</p>
+<ul>
+<li>Uncomment the <code>tag</code> directive before you move on.</li>
+</ul>
+</section>
+<section id="named-outputs" class="level3" data-number="4.2">
+<h3 data-number="4.2" class="anchored" data-anchor-id="named-outputs"><span class="header-section-number">4.2</span> Named outputs</h3>
+<p>Let’s move on to the next process! It looks like this:</p>
+<pre class="nextflow"><code>process RUN_FASTQC {
+
+    // Run FastQC on a FASTQ file.
+
+    tag "${sample}"
+    publishDir "results/",
+        mode: "copy"
+
+    input:
+    tuple val(sample), path(fastq)
+
+    output:
+    path("*.html")
+    path("*.zip")
+
+    script:
+    """
+    fastqc ${fastq} -q
+    """
+}</code></pre>
+<p>Here is a process with two output channels! One contains all the <code>.html</code> files, while the other contains all the <code>.zip</code> files. How is this handled in the workflow definition of downstream processes that use the outputs? The <code>RUN_MULTIQC</code> process uses this output, and its part in the workflow definition looks like this:</p>
+<pre class="nextflow"><code>RUN_MULTIQC (
+    RUN_FASTQC.out[1].collect()
+)</code></pre>
+<p>We already know about <code>.out</code> and <code>.collect()</code>, but we have something new here: the <code>RUN_MULTIQC</code> process is taking the second channel of the output from the <code>RUN_FASTQC</code> process - <code>[1]</code> is the index for the second channel, as Groovy is zero-based (the first channel is indexed by <code>[0]</code>).</p>
+<p>This comes with some issues, however. What if we accidentally changed the order of the outputs in the rule, or added a new one? Using positions like this is easy to mess up, but there is a better solution: named outputs! This can be achieved by adding the <code>emit</code> option for some or all of the outputs, like so:</p>
+<pre class="nextflow"><code>output:
+path(*.txt), emit: text</code></pre>
+<p>Instead of referring to the output by its position in an array as before we refer to the channel with a label we choose (<code>.out.text</code>) instead. This benefits us in that we can infer more information about channel contents called <code>text</code> rather than <code>[1]</code>, and it is also allows us to be less error-prone when rewriting parts of a workflow.</p>
+<ul>
+<li>Your turn! Add named outputs to the <code>RUN_FASTQC</code> process and make <code>RUN_MULTIQC</code> use those outputs. You’ll have to change both the output section of the <code>RUN_FASTQC</code> process, and the workflow definition section for <code>RUN_MULTIQC</code>. If you need help, see the hint below.</li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-11-contents" aria-controls="callout-11" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-11" class="callout-11-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// Workflow definition for RUN_MULTIQC
+RUN_MULTIQC (
+    RUN_FASTQC.out.zip.collect()
+)
+
+// Output section of RUN_FASTC
+output:
+path("*.html"), emit: html
+path("*.zip"),  emit: zip</code></pre>
+</div>
+</div>
+</div>
+<p>Check if it works by executing the workflow.</p>
+</section>
+<section id="advanced-publishing" class="level3" data-number="4.3">
+<h3 data-number="4.3" class="anchored" data-anchor-id="advanced-publishing"><span class="header-section-number">4.3</span> Advanced publishing</h3>
+<p>So far we’ve only used the <code>publishDir</code> directive in a very simple way: specifying a directory and the <code>mode</code> to use when publishing (to copy the files rather than symbolically link them). There are more things you can do, however, especially for processes with more than one output. For example, we can publish outputs in separate directories, like so:</p>
+<pre class="nextflow"><code>publishDir "results/tables",
+    pattern: "*.tsv",
+    mode: "copy"
+publishDir "results/logs",
+    pattern: "*.log",
+    mode: "copy"</code></pre>
+<p>In this example, <code>*.tsv</code> files are copied to the folder <code>results/tables/</code>, while <code>*.log</code> files are copied to the folder <code>results/logs</code>. The <code>publishDir</code> directive can be used multiple times in a single process, allowing one to separate output as above, or publish the same output to multiple folders.</p>
+<ul>
+<li>Edit the <code>RUN_FASTQC</code> process to place the HTML and compressed files in separate directories. Remove the <code>results</code> directory and re-run the workflow to check that it worked - click below if you’re having trouble.</li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-12-contents" aria-controls="callout-12" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-12" class="callout-12-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>process RUN_FASTQC {
+
+    (...)
+
+    publishDir "results/fastqc/html",
+        pattern: "*.html",
+        mode: "copy"
+    publishDir "results/fastqc/zip",
+        pattern: "*.zip",
+        mode: "copy"
+
+    (...)
+}
+</code></pre>
+</div>
+</div>
+</div>
+<p>Note that an output and a <em>published</em> output are different things: something can be an output of a process without being published. In fact, the <code>RUN_FASTQC</code> process is a prime example of this! Think about the compressed output: this output is only used by the downstream process <code>RUN_MULTIQC</code> and is never meant to be viewed by a human or used by a human in some downstream task not part of the pipeline itself. We would thus like to keep the compressed files as an output, but not publish said output. How do we do this? Just remove the corresponding <code>publishDir</code> directive!</p>
+<p>The MRSA workflow we’ve made here was refactored directly from its original version in the Snakemake tutorial of this course, which means that its output structure is not fully taking advantage of some of Nextflow’s functionality. The compressed output we’ve already talked about above is one example.</p>
+<ul>
+<li>See if you can find any other processes in the current implementation of the MRSA workflow that you could optimise like this!</li>
+</ul>
+<p>Think about whether all processes actually need to have published outputs. Make sure you test executing the workflow after you’ve made any changes; click below if you want a hint.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-13-contents" aria-controls="callout-13" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-13" class="callout-13-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<p>The <code>GET_GENOME_FASTA</code> and <code>GET_GENOME_GFF3</code> both download reference files which are only needed by the workflow itself and does not need to be published, the same goes for the genome index generated by the <code>INDEX_GENOME</code> process.</p>
+<p>One could argue that neither of the BAM files generated by the <code>ALIGN_TO_GENOME</code> and <code>SORT_BAM</code> processes are needed by the user if only the final counts table is of interest, but BAM files can also be useful for exploring the alignments in <em>e.g.</em> IGV. Both BAMs are, however, definitely not needed: only the sorted one should be published if one is interested in BAM files.</p>
+</div>
+</div>
+</div>
+</section>
+<section id="debugging" class="level3" data-number="4.4">
+<h3 data-number="4.4" class="anchored" data-anchor-id="debugging"><span class="header-section-number">4.4</span> Debugging</h3>
+<p>It is, sadly, inevitable that we all make mistakes while coding - nobody’s perfect! Nextflow helps you quite a bit when this happens, not just with its logs but also with informative error messages. Let’s introduce an error and look at what we get:</p>
+<ul>
+<li>Change the final <code>output</code> line in the <code>RUN_MULTIQC</code> process to the following and re-run the workflow: <code>path("multiqc_general_stats.csv")</code> - notice the usage of <code>.csv</code> rather than <code>.txt</code> as before.</li>
+</ul>
+<p>We got an error! We get a number of things, actually, including (in order from the top) the name of the process that gave the error, the likely cause, the command that was executed, along with its exit status, output, error and the work directory that the task was run in. Let’s focus on the <code>Caused by:</code> part at the top, which should look something like this:</p>
+<pre class="no-highlight"><code>Caused by:
+  Missing output file(s) `multiqc_general_stats.csv` expected by process `RUN_MULTIQC`</code></pre>
+<p>We can also see that the command’s exit status is <code>0</code>, which means that the command was successful; any exit status other than <code>0</code> means there was an error of some kind. We can thus infer that the command (1) worked, (2) failed to give us the output expected by Nextflow. Thankfully, Nextflow graciously prints the work directory for us so that we may check out what happened in more detail.</p>
+<ul>
+<li>Copy the working directory path, <code>cd</code> into it and list its contents using <code>ls</code>.</li>
+</ul>
+<p>You might already have spotted the error in the message above; the error we introduced here was that the expected output file has a <code>.csv</code> extension, rather than the correct <code>.txt</code>. Nextflow is expecting the <code>.csv</code> output, but the process <code>script</code> directive is (correctly) giving us the <code>.txt</code> file, which we can see inside the process’ work directory.</p>
+<ul>
+<li>Go back to the root directory, revert the error you introduced and re-run the workflow to make sure it works again.</li>
+</ul>
+<p>This might have seemed like a trivial error, but a lot of errors in Nextflow can be solved in the same manner, <em>i.e.</em> by just following the debugging output reported by Nextflow and inspecting the specific subdirectory in question.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="A note about Bash">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note about Bash
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you are using Bash variables inside the <code>script</code> directive you have to be careful to prepend it with a backslash, <em>e.g.</em> <code>\${BASH_VARIABLE}</code>. This is because the dollar-sign is used by Nextflow, so you have to tell Nextflow explicitly when you’re using a Bash variable. This is a common source of errors when using Bash variables, so keeping it in mind can save you some debugging time!</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learnt:</p>
+<ul>
+<li>How to use the <code>tag</code> directive</li>
+<li>How to use named output with <code>emit</code></li>
+<li>How to publish outputs into different directories</li>
+<li>How to debug errors and mistakes</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="workflow-configuration" class="level2" data-number="5">
+<h2 data-number="5" class="anchored" data-anchor-id="workflow-configuration"><span class="header-section-number">5</span> Workflow configuration</h2>
+<p>We’ve so far been working with a relatively non-generalised workflow: it’s got hard-coded inputs, paths and genome references. This is perfectly fine for a project that is purely aimed at getting reproducible results (which is the full extent of what you want in a lot of cases), but it can be made a lot more generalisable. Let’s go through the MRSA workflow and see what can be improved!</p>
+<section id="parameters" class="level3" data-number="5.1">
+<h3 data-number="5.1" class="anchored" data-anchor-id="parameters"><span class="header-section-number">5.1</span> Parameters</h3>
+<p>One of the things that allow generalisability of Nextflow workflows is <em>parameters</em>, which hold information and values that can be changed directly on the command-line at the time of execution. One use of parameters in our MRSA workflow is to remove the hard-coded <code>results</code> output directory, for example. Parameters can be written in the following form:</p>
+<pre class="nextflow"><code>params {
+    parameter_1 = "some/data/path"      // A string parameter
+    parameter_2 = 42                    // A value parameter
+    parameter_3 = ["a", "b", "c", "d"]  // A list parameter
+}</code></pre>
+<p>You would then refer to these parameters using <em>e.g.</em> <code>params.parameter_1</code> anywhere you need to in the workflow. Although parameters can be defined in <code>main_mrsa.nf</code>, it is preferable to define them in a separate <em>configuration file</em>. The default name of this file is <code>nextflow.config</code> and if such a file is present it will be used automatically by Nextflow (to supply a config file with another name use <code>nextflow -c &lt;path-to-config-file&gt; run main_mrsa.nf</code>)</p>
+<ul>
+<li><p>Create a configuration file and add a parameter for the <code>results</code> output directory.</p></li>
+<li><p>Use your newly created parameter in the <code>publishDir</code> directory of a process Run your workflow to see if it worked; click below if you need help.</p></li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-16-contents" aria-controls="callout-16" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-16" class="callout-16-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// Configuration file
+params {
+    outdir = "results"
+}
+
+// A publishDir directive in a process
+publishDir "${params.outdir}",
+    mode: "copy"</code></pre>
+</div>
+</div>
+</div>
+</section>
+<section id="command-line-parameters" class="level3" data-number="5.2">
+<h3 data-number="5.2" class="anchored" data-anchor-id="command-line-parameters"><span class="header-section-number">5.2</span> Command line parameters</h3>
+<p>Workflow parameters can be assigned on the command-line by executing workflows like so: <code>nextflow run main_mrsa.nf --parameter_name 'some_value'</code>. The workflow parameter <code>parameter_name</code> is prefixed by a double dash <code>--</code> to tell Nextflow this is a parameter to the workflow (a single dash is a parameter to Nextflow, <em>e.g.</em> <code>-resume</code>). The value is also quoted (this is important for parameters that take file paths as values).</p>
+<ul>
+<li>Run your workflow using the parameter you previously created, but pick something other than the default value!</li>
+</ul>
+<p>You should now have a new directory containing all the results! This is highly useful if you want to keep track of separate runs of a workflow with different software parameters, for example: <code>nextflow run main.nf --important_param 'value1' --resultsdir 'results-value1'</code>, or simply want to keep the results of separate versions of the same workflow. You can also change parameters by using the <code>-params-file</code> option or by using another configuration file (and using <code>-c</code>), rather than on the command line!</p>
+</section>
+<section id="configuring-inputs" class="level3" data-number="5.3">
+<h3 data-number="5.3" class="anchored" data-anchor-id="configuring-inputs"><span class="header-section-number">5.3</span> Configuring inputs</h3>
+<p>Remember the input for the MRSA workflow, the <code>ch_input</code> channel? This input (the <code>samplesheet.csv</code> file) is hard-coded inside the <code>main_mrsa.nf</code> file. This could also be made into a parameter!</p>
+<ul>
+<li>Change the definition of the <code>ch_input</code> channel to take the value of a new parameter of your choice, defined in the configuration file.</li>
+</ul>
+<p>You should now have a more generalised input to your workflow! Try to run it to make sure it works - look below if you need some help.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-17-contents" aria-controls="callout-17" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-17" class="callout-17-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// Channel definition
+ch_input = Channel
+    .fromPath ( params.input )
+    .splitCsv ( header: true )
+
+// Configuration file
+input = "samplesheet.csv"</code></pre>
+</div>
+</div>
+</div>
+<p>By specifying inputs from sample sheets like this we can change inputs of a workflow execution by creating another sample sheet and specifying <em>e.g.</em>, <code>--input samplesheet-2.csv</code> on the command line. This is highly useful when you want to run a single sample <em>e.g.</em>, when testing a workflow, or when you want to keep track of all the different inputs you’ve used historically.</p>
+</section>
+<section id="other-configuration-scopes" class="level3" data-number="5.4">
+<h3 data-number="5.4" class="anchored" data-anchor-id="other-configuration-scopes"><span class="header-section-number">5.4</span> Other configuration scopes</h3>
+<p>There are lots of things that you might want to add to your configuration, not just parameters! The workflow <em>manifest</em>, for example, which might look like this:</p>
+<pre class="nextflow"><code>manifest {
+    name        = "My Workflow"
+    description = "My awesome workflow, created by me"
+    author      = "Me"
+    mainScript  = "main.nf"
+    version     = "1.0.0"
+}</code></pre>
+<ul>
+<li>Go ahead and add a workflow manifest to your <code>nextflow.config</code> file!</li>
+</ul>
+<p>The manifest is useful when you’re publishing or sharing the workflow through <em>e.g.</em> GitHub or similar. There are many more such configuration <em>scopes</em> that you might want to use - read more about them <a href="https://www.nextflow.io/docs/latest/config.html#config-scopes">in the documentation</a>.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we learnt:</p>
+<ul>
+<li>How to create parameters in a configuration file</li>
+<li>How to specify parameters on the command line</li>
+<li>How to add workflow manifest and other configuration scopes</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="optimising-the-mrsa-workflow" class="level2" data-number="6">
+<h2 data-number="6" class="anchored" data-anchor-id="optimising-the-mrsa-workflow"><span class="header-section-number">6</span> Optimising the MRSA workflow</h2>
+<p>We just added several parameters and configurations to our MRSA workflow, but we didn’t do anything about the reference genomes: those are still hard-coded. The current MRSA workflow is, in fact, not very well-optimised for Nextflow at all, being a refactor from the Snakemake tutorial of this course.</p>
+<p>All of the processes are basically unchanged, excluding some minor alterations. For example, the <code>run_fastqc</code> rule in Snakemake used the <code>-o</code> flag to specify that the results should be in the current directory, followed by moving the output files to their respective output directory. The first part is not needed in Nextflow (as everything is run in its own subdirectory), and the second part is done by the <code>publishDir</code> directive. These are just minor alterations, though, but we can do much more if we fully utilise Nextflow’s features!</p>
+<section id="remote-files" class="level3" data-number="6.1">
+<h3 data-number="6.1" class="anchored" data-anchor-id="remote-files"><span class="header-section-number">6.1</span> Remote files</h3>
+<p>One of these features is the ability to automatically download remote files, without needing to explicitly do so! The <code>path</code> input type can handle either file paths (like we’ve done so far) or a URI-supported protocol (such as <code>http://</code>, <code>s3://</code>, <code>ftp://</code>, <em>etc.</em>). This would be highly useful for <em>e.g.</em> the <code>GET_GENOME_FASTA</code> process - in fact, we don’t even need that process at all! All we need to do is to change the input to the <code>INDEX_GENOME</code> and <code>ALIGN_TO_GENOME</code> processes.</p>
+<ul>
+<li><p>Create a new input channel using the <code>fromPath()</code> channel factory and the absolute path (the FTP address) to the genome FASTA.</p></li>
+<li><p>Make the <code>INDEX_GENOME</code> process use that input channel instead of the previously used output of the <code>GET_GENOME_FASTA</code> process.</p></li>
+<li><p>Remove the <code>GET_GENOME_FASTA</code> process, as it is not needed anymore.</p></li>
+</ul>
+<p>Re-run the workflow to see if it worked. Check the code below for an example if you’re stuck:</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-19-contents" aria-controls="callout-19" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-19" class="callout-19-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// Channel creation
+ch_genome_fasta = Channel.fromPath( "ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz" )
+
+// Workflow definition
+INDEX_GENOME (
+    ch_genome_fasta
+)</code></pre>
+</div>
+</div>
+</div>
+<p>We could also do this using parameters from our configfile, of course!</p>
+<ul>
+<li>Now change the input to the <code>GENERATE_COUNTS_TABLE</code> to use the remote GFF3 file and remove the <code>GET_GENOME_GFF3</code> in the same manner as above, but using a new parameter instead.</li>
+</ul>
+<p>Re-run the workflow again to make sure it worked; check below if you’re stuck.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-20-contents" aria-controls="callout-20" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-20" class="callout-20-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// [ nextflow.config ]
+params {
+    genome_gff3 = "ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.37.gff3.gz"
+}
+
+// [ main.nf ]
+// Channel creation
+ch_genome_ggf3 = Channel.fromPath ( params.genome_gff3 )
+
+// Workflow definition
+GENERATE_COUNTS_TABLE (
+    SORT_BAM.out.bam.collect(),
+    ch_genome_ggf3
+)</code></pre>
+</div>
+</div>
+</div>
+<p>If we want to get detailed we can also change the hard-coded “NCT8325” naming in <em>e.g.</em> the <code>INDEX_GENOME</code> process and put that in another parameter, or grab the <code>baseName()</code> from the channel and make a <code>[prefix, file]</code> tuple using the <code>map{}</code> operator like we did previously; check below if you’re curious of how this could be done.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-21-contents" aria-controls="callout-21" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-21" class="callout-21-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// Channel definition
+ch_genome_fasta = Channel
+    .fromPath( "ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz" )
+    .map     { file -&gt; tuple(file.getBaseName(), file) }
+
+// INDEX_GENOME process definition
+process INDEX_GENOME {
+
+    publishDir "results/bowtie2/",
+        mode: "copy"
+
+    input:
+    tuple val(fasta_name), path(fasta)
+
+    output:
+    path("*.b2t"), emit: index
+
+    script:
+    """
+    # Bowtie2 cannot use .gz, so unzip to a temporary file first
+    gunzip -c ${fasta} &gt; tempfile
+    bowtie2-build tempfile ${fasta_name}
+    """
+}</code></pre>
+</div>
+</div>
+</div>
+</section>
+<section id="subworkflows" class="level3" data-number="6.2">
+<h3 data-number="6.2" class="anchored" data-anchor-id="subworkflows"><span class="header-section-number">6.2</span> Subworkflows</h3>
+<p>The DSL2 allows highly modular workflow design, where a workflow may contain multiple <em>subworkflows</em>. A subworkflow is just like a normal workflow, but it can be called inside other workflows, similar to a process. There is thus no special difference between a subworkflow and a workflow; the only difference is how you use them in practice. Let’s take a look at a toy example:</p>
+<pre class="nextflow"><code>workflow {
+    ch_input = Channel.fromPath ( params.input )
+    SUBWORKFLOW (
+        ch_input
+    )
+}
+
+workflow SUBWORKFLOW {
+
+    take:
+    input_file
+
+    main:
+    ALIGN_READS( input_file )
+
+    emit:
+    bam = ALIGN_READS.out.bam
+}</code></pre>
+<p>Here we have an unnamed, main workflow like before, plus a named subworkflow. A workflow can have inputs specified by the <code>take</code> directive, which is the equivalent of process <code>input</code> for workflows. The <code>main</code> part is the workflow body, which contains how to run which processes in which order. The last part, <code>emit</code>, also works the same as for processes, in that we name the different outputs of the workflow so that we may use them in other workflows or processes. Nextflow will run the unnamed workflow by default, unless the <code>-entry</code> flag is specified, like so:</p>
+<div class="sourceCode" id="cb32"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="ex">nextflow</span> run main.nf <span class="at">-entry</span> SUBWORKFLOW</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will run the workflow named <code>SUBWORKFLOW</code>, but nothing else. You can also store subworkflows in separate files, so that everything doesn’t have to be crammed into a single <code>main.nf</code> file. A subworkflow named <code>SUBWORKFLOW</code> contained in the file <code>subworkflow.nf</code> can be loaded into a <code>main.nf</code> file like so:</p>
+<pre class="nextflow"><code>include { SUBWORKFLOW } from "./subworkflow.nf"</code></pre>
+<p>If you have a complex workflow with several subworkflows you might thus store them in a separate directory, <em>e.g.</em> <code>subworkflows/</code>. This allows you to have fine-grained control over the general architecture of your Nextflow workflows, organising them in a manner that is easy to code and maintain. A <code>process</code> can also be treated in the same manner, and defined separately in another file.</p>
+<ul>
+<li>Now it’s your turn! Separate the <code>RUN_FASTQC</code> and <code>RUN_MULTIQC</code> processes out of the main workflow and into a subworkflow. Check below if you’re having trouble.</li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-22-contents" aria-controls="callout-22" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-22" class="callout-22-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// [ main.nf ]
+// Include subworkflow
+include { QUALITY_CONTROLS } from "./subworkflows/quality_controls.nf"
+
+// Main workflow
+QUALITY_CONTROLS (
+    DOWNLOAD_FASTQ_FILES.out
+)
+
+// [ subworkflows/quality_controls.nf ]
+// Quality controls subworkflow
+workflow QUALITY_CONTROLS {
+
+    take:
+    fastq
+
+    main:
+    RUN_FASTQC (
+        fastq
+    )
+    RUN_MULTIQC (
+        RUN_FASTQC.out.zip.collect()
+    )
+
+    emit:
+    html          = RUN_MULTIQC.out.html
+    general_stats = RUN_MULTIQC.out.general_stats
+}
+
+// [ Include RUN_FASTQC and RUN_MULTIQC processes here ]</code></pre>
+</div>
+</div>
+</div>
+<p>If you want to challenge yourself, try to do the same with the <code>INDEX_GENOME</code>, <code>ALIGN_TO_GENOME</code> and <code>SORT_BAM</code> processes! Be careful of where you get your inputs and outputs; check below if you want one of the ways in which you can do this:</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-23-contents" aria-controls="callout-23" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-23" class="callout-23-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="nextflow"><code>// [ main.nf ]
+// Include subworkflow
+include { ALIGNMENT } from "./subworkflows/alignment.nf"
+
+// Main workflow
+ALIGNMENT {
+    ch_genome_fasta,
+    DOWNLOAD_FASTQ_FILES.out
+}
+
+// [ subworkflows/alignment.nf ]
+// Alignment subworkflow
+workflow ALIGNMENT {
+
+    take:
+    fasta
+    fastq
+
+    main:
+    INDEX_GENOME (
+        fasta
+    )
+    ALIGN_TO_GENOME (
+        fastq,
+        INDEX_GENOME.out.index
+    )
+    SORT_BAM (
+        ALIGN_TO_GENOME.out.bam
+    )
+
+    emit:
+    bam = SORT_BAM.out.bam
+}
+
+// [ Include INDEX_GENOME, ALIGN_TO_GENOME and SORT_BAM processes here ]</code></pre>
+</div>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we learnt:</p>
+<ul>
+<li>How to automatically download remote files</li>
+<li>How to create and work with subworkflows</li>
+</ul>
+</div>
+</div>
+</section>
+</section>
+<section id="extra-material" class="level2" data-number="7">
+<h2 data-number="7" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">7</span> Extra material</h2>
+<p>There are many more things you can do with Nextflow than covered here. If you are interested to learn more details about Nextflow, we will briefly show some of its advanced features in this section. But first, here are some links to additional resources on Nextflow:</p>
+<ul>
+<li><a href="http://nextflow-io.github.io/patterns/index.html">Nextflow patterns</a> that can help with common operations and concepts</li>
+<li>The Nextflow <a href="https://www.nextflow.io/docs/latest/index.html">documentation</a></li>
+<li>Nextflow training at <a href="https://seqera.io/training/">Seqera</a></li>
+<li>A work-in-progress <a href="https://carpentries-incubator.github.io/workflows-nextflow/index.html">Nextflow Carpentry course</a></li>
+<li>Community help from <a href="https://join.slack.com/t/nextflow/shared_invite/zt-11iwlxtw5-R6SNBpVksOJAx5sPOXNrZg">Nextflow’s Slack channel</a></li>
+</ul>
+<section id="using-containers-in-nextflow" class="level3" data-number="7.1">
+<h3 data-number="7.1" class="anchored" data-anchor-id="using-containers-in-nextflow"><span class="header-section-number">7.1</span> Using containers in Nextflow</h3>
+<p>Nextflow has built-in support for using both Docker and Apptainer containers (and others too), either with a single container for the workflow as a whole or separate containers for each individual process. The simplest way to do it is to have a single container for your entire workflow, in which case you simply run the workflow and specify the image you want to use, like so:</p>
+<div class="sourceCode" id="cb36"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Run with docker</span></span>
+<span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a><span class="ex">nextflow</span> run main.nf <span class="at">-with-docker</span> image-name</span>
+<span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb36-4"><a href="#cb36-4" aria-hidden="true" tabindex="-1"></a><span class="co"># Run with Apptainer</span></span>
+<span id="cb36-5"><a href="#cb36-5" aria-hidden="true" tabindex="-1"></a><span class="ex">nextflow</span> run main.nf <span class="at">-with-apptainer</span> image.sif</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you don’t want to supply this at every execution, you can also add it directly to your configuration file:</p>
+<div class="sourceCode" id="cb37"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Docker configuration</span></span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="ex">process.container</span> = <span class="st">'image-name'</span></span>
+<span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a><span class="ex">docker.enabled</span> = true</span>
+<span id="cb37-4"><a href="#cb37-4" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb37-5"><a href="#cb37-5" aria-hidden="true" tabindex="-1"></a><span class="co"># Apptainer configuration</span></span>
+<span id="cb37-6"><a href="#cb37-6" aria-hidden="true" tabindex="-1"></a><span class="ex">process.container</span> = <span class="st">'path/to/image.sif'</span></span>
+<span id="cb37-7"><a href="#cb37-7" aria-hidden="true" tabindex="-1"></a><span class="ex">apptainer.enabled</span> = true</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you instead would like to have each process use a different container you can use the <code>container</code> directive in your processes:</p>
+<pre class="nextflow"><code>process PROCESS_01 {
+    (...)
+    container: 'image_01'
+    (...)
+}
+
+process PROCESS_02 {
+    (...)
+    container: 'image_02'
+    (...)
+}</code></pre>
+<p>Regardless of which solution you go for, Nextflow will execute all the processes inside the specified container. In practice, this means that Nextflow will automatically wrap your processes and run them by executing the Docker or Apptainer command with the image you have provided.</p>
+</section>
+<section id="using-conda-in-nextflow" class="level3" data-number="7.2">
+<h3 data-number="7.2" class="anchored" data-anchor-id="using-conda-in-nextflow"><span class="header-section-number">7.2</span> Using Conda in Nextflow</h3>
+<p>While you can execute Nextflow inside Conda environments just like you would any other type of software, you can also use Conda with Nextflow in the same way as for Docker and Apptainer above. You can either supply an <code>environment.yml</code> file, the path to an existing environment or the packages and their versions directly in the <code>conda</code> directive, like so:</p>
+<pre class="nextflow"><code>process PROCESS_01 {
+    (...)
+    conda: 'mrsa-environment.yml'
+    (...)
+}
+process PROCESS_02 {
+    (...)
+    conda: 'path/to/mrsa-env'
+    (...)
+}
+process PROCESS_03 {
+    (...)
+    conda: 'bioconda::bwa=0.7.17 bioconda::samtools=1.13'
+    (...)
+}</code></pre>
+<p>You can use either of the methods described above with your configuration file as well, here exemplified using an <code>environment.yml</code> file:</p>
+<pre class="nextflow"><code>process.conda = 'mrsa-environment.yml'</code></pre>
+</section>
+<section id="running-nextflow-on-uppmax" class="level3" data-number="7.3">
+<h3 data-number="7.3" class="anchored" data-anchor-id="running-nextflow-on-uppmax"><span class="header-section-number">7.3</span> Running Nextflow on Uppmax</h3>
+<p>A lot of researchers in Sweden are using the Uppmax computer cluster in Uppsala, which is easily handled by Nextflow. What you need to do is to add the following <em>profile</em> to your <code>nextflow.config</code> file:</p>
+<pre><code>profiles {
+    // Uppmax general profile
+    uppmax {
+        params {
+            account        = null
+        }
+        process {
+            executor       = 'slurm'
+            clusterOptions = "-A '${params.account}'"
+            memory         = { 6.GB * task.attempt }
+            cpus           = { 1 * task.attempt }
+            time           = { 10.h * task.attempt }
+            scratch        = '$SNIC_TMP'
+            errorStrategy  = 'retry'
+            maxRetries     = 1
+        }
+    }
+}</code></pre>
+<p>This will add a profile to your workflow, which you can access by running the workflow with <code>-profile uppmax</code>. You will also have to supply an extra parameter <code>account</code> which corresponds to your SNIC project account, but the rest you can leave as-is, unless you want to tinker with <em>e.g.</em> compute resource specifications. That’s all you need! Nextflow will take care of communications with SLURM (the system used by Uppmax, specified by the <code>executor</code> line) and will send off jobs to the cluster for you, and everything will look exactly the same way as if you were executing the pipeline locally.</p>
+<p>The <code>memory</code>, <code>cpus</code> and <code>time</code> lines define the various resources Nextflow will use as well as how much to automatically increase them by if re-trying failed tasks; this, in turn, is specified by the <code>errorStrategy</code> and <code>maxRetries</code> variables. The <code>scratch</code> variable defines where each node’s local storage is situated, which gives Nextflow the most optimal access to the Uppmax file system for temporary files.</p>
+</section>
+<section id="advanced-channel-creation" class="level3" data-number="7.4">
+<h3 data-number="7.4" class="anchored" data-anchor-id="advanced-channel-creation"><span class="header-section-number">7.4</span> Advanced channel creation</h3>
+<p>The input data shown in the MRSA example workflow is not that complex, but Nextflow channels can do much more than that. A common scenario in high-throughput sequencing is that you have pairs of reads for each sample. Nextflow has a special, built-in way to create channels for this data type: the <code>fromFilePairs</code> channel factory:</p>
+<pre class="nextflow"><code>ch_raw_reads = Channel
+    .fromFilePairs ( "data/*_R{1,2}.fastq.gz" )</code></pre>
+<p>This will create a channel containing all the reads in the <code>data/</code> directory in the format <code>&lt;sample&gt;_R1.fastq.gz</code> and <code>&lt;sample&gt;_R2.fastq.gz</code> and will pair them together into a nested tuple looking like this:</p>
+<pre class="nextflow"><code>[sample, [data/sample_R1.fastq.gz, data/sample_R2.fastq.gz]]</code></pre>
+<p>The first element of the tuple (<code>[0]</code>) thus contains the value <code>sample</code>, while the second element (<code>[1]</code>) contains another tuple with paths to both read files. This nested tuple can be passed into processes for <em>e.g.</em> read alignment, and it makes the entire procedure of going from read pairs (<em>i.e.</em> two separate files, one sample) into a single alignment file (one file, one sample) very simple. For more methods of reading in data see the Nextflow documentation on <a href="https://www.nextflow.io/docs/latest/channel.html#channel-factory">Channel Factories</a>.</p>
+<p>We can also do quite advanced things to manipulate data in channels, such as this:</p>
+<pre class="nextflow"><code>samples_and_treatments = Channel
+    .fromPath ( params.metadata )
+    .splitCsv ( sep: "\t", header: true )
+    .map      { row -&gt; tuple("${row.sample_id}", "${row.treatment}") }
+    .filter   { id, treatment -&gt; treatment != "DMSO" }
+    .unique   ( )</code></pre>
+<p>That’s a bit of a handful! But what does it do? The first line specifies that we want to read some data from a file specified by the <code>metadata</code> parameter, and the second line actually reads that data using tab as delimiter, including a header. The <code>map</code> operator takes each entire row and subsets it to only two columns: the <code>sample_id</code> and <code>treatment</code> columns (discarding the other columns). This subset is stored as a tuple. The <code>filter</code> operator is then used to remove any tuples where the second entry (<code>treatment</code>) is not equal to the string <code>"DMSO"</code> (<em>i.e.</em> untreated cells, in this example). Finally, we only keep unique tuple values. Let’s say that this is the metadata we’re reading:</p>
+<pre class="no-highlight"><code>sample        dose    group     treatment
+sample_1      0.1     control   DMSO
+sample_1      1.0     control   DMSO
+sample_1      2.0     control   DMSO
+sample_2      0.1     case      vorinostat
+sample_2      1.0     case      vorinostat
+sample_2      2.0     case      vorinostat
+sample_3      0.1     case      fulvestrant
+sample_3      1.0     case      fulvestrant
+sample_3      2.0     case      fulvestrant</code></pre>
+<p>Given the channel creation strategy above, we would get the following result:</p>
+<pre class="no-highlight"><code>[sample_2, vorinostat]
+[sample_3, fulvestrant]</code></pre>
+<p>In this way, you can perform complex operations on input files or input metadata and send the resulting content to your downstream processes in a simple way. Composing data manipulations in Nextflow like this can be half the fun of writing the workflow. Check out Nextflow’s documentation on <a href="https://www.nextflow.io/docs/latest/operator.html">Channel operators</a> to see the full list of channel operations at your disposal.</p>
+</section>
+<section id="using-groovy-in-processes" class="level3" data-number="7.5">
+<h3 data-number="7.5" class="anchored" data-anchor-id="using-groovy-in-processes"><span class="header-section-number">7.5</span> Using Groovy in processes</h3>
+<p>You don’t have to use bash or external scripts inside your processes all the time unless you want to: Nextflow is based on Groovy, which allows you to use both Groovy and Bash in the same process. For example, have a look at this:</p>
+<pre class="nextflow"><code>process index_fasta {
+    tag "${fasta_name}"
+
+    input:
+    tuple val(fasta), path(fasta_file)
+
+    output:
+    path("${fasta_name}.idx"), emit: fasta
+
+    script:
+    fasta_name = fasta.substring(0, fasta.lastIndexOf("."))
+    """
+    index --ref ${fasta_file},${fasta_name}
+    """
+}</code></pre>
+<p>Here we have some command <code>index</code> that, for whatever reason, requires both the path to a FASTA file and the name of that file <em>without</em> the <code>.fasta</code> extension. We can use Groovy in the <code>script</code> directive together with normal Bash, mixing and matching as we like. The first line of the <code>script</code> directive gets the name of the FASTA file without the extension by removing anything after the dot, while the second calls the <code>index</code> command like normal using bash.</p>
+</section>
+<section id="the-nf-core-pipeline-collection" class="level3" data-number="7.6">
+<h3 data-number="7.6" class="anchored" data-anchor-id="the-nf-core-pipeline-collection"><span class="header-section-number">7.6</span> The nf-core pipeline collection</h3>
+<p>You may have heard of the <a href="https://nf-co.re/">nf-core</a> pipeline collection previously, which is a large, collaborative bioinformatics community dedicated to building, developing and maintaining Nextflow workflows. In fact, if you have sequenced data at <em>e.g.</em> the National Genomics Infrastructure (<a href="https://ngisweden.scilifelab.se/">NGI</a>), you can be sure that the data processing has been run using one of the nf-core pipelines! While the community only started in 2018 (with a <a href="https://www.nature.com/articles/s41587-020-0439-x">Nature Biotechnology</a> paper in 2020), it already has over 30 production-ready pipelines with everything from genomics, transcriptomics, proteomics and metagenomics - and more being developed all the time.</p>
+<p>The nf-core pipelines all work in the same way, in that they have the same exact base for inputs, parameters and arguments, making them all highly similar to run. Since you’ve already learnt the basics of Nextflow in this course, you should now be able to also run the nf-core pipelines! It might be that you have a data type that you can analyse using one of the pipelines in nf-core, meaning you don’t need to do anything other than find out what parameters you should run it with.</p>
+<p>Each pipeline comes with extensive documentation, test datasets that you can use to practice on, can be run on both HPCs like Uppmax, cloud services like AWS or locally on your own computer. All pipelines support both Conda and Docker/Apptainer, and you can additionally run specific versions of the pipelines, allowing for full reproducibility of your analyses. If you want to check nf-core out, simply head over to their <a href="https://nf-co.re/pipelines">list of pipelines</a> and see what’s available! Who knows, you might even write your own nf-core pipeline in the future?</p>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/pages/quarto.html b/pages/quarto.html
new file mode 100644
index 00000000..ca87f6b3
--- /dev/null
+++ b/pages/quarto.html
@@ -0,0 +1,1352 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Working with Quarto</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Working with Quarto</h1>
+            <p class="subtitle lead">How to generate reproducible reports and computational notebooks</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a>
+  <ul>
+  <li><a href="#creating-quarto-documents" id="toc-creating-quarto-documents" class="nav-link" data-scroll-target="#creating-quarto-documents"><span class="header-section-number">2.1</span> Creating Quarto documents</a></li>
+  <li><a href="#previewing-documents" id="toc-previewing-documents" class="nav-link" data-scroll-target="#previewing-documents"><span class="header-section-number">2.2</span> Previewing documents</a></li>
+  <li><a href="#rendering-to-pdf" id="toc-rendering-to-pdf" class="nav-link" data-scroll-target="#rendering-to-pdf"><span class="header-section-number">2.3</span> Rendering to PDF</a></li>
+  <li><a href="#languages" id="toc-languages" class="nav-link" data-scroll-target="#languages"><span class="header-section-number">2.4</span> Languages</a></li>
+  </ul></li>
+  <li><a href="#code-chunks" id="toc-code-chunks" class="nav-link" data-scroll-target="#code-chunks"><span class="header-section-number">3</span> Code chunks</a>
+  <ul>
+  <li><a href="#figure-options" id="toc-figure-options" class="nav-link" data-scroll-target="#figure-options"><span class="header-section-number">3.1</span> Figure options</a></li>
+  <li><a href="#cross-references" id="toc-cross-references" class="nav-link" data-scroll-target="#cross-references"><span class="header-section-number">3.2</span> Cross-references</a></li>
+  <li><a href="#sub-figures" id="toc-sub-figures" class="nav-link" data-scroll-target="#sub-figures"><span class="header-section-number">3.3</span> Sub-figures</a></li>
+  <li><a href="#tables" id="toc-tables" class="nav-link" data-scroll-target="#tables"><span class="header-section-number">3.4</span> Tables</a></li>
+  </ul></li>
+  <li><a href="#document-options" id="toc-document-options" class="nav-link" data-scroll-target="#document-options"><span class="header-section-number">4</span> Document options</a>
+  <ul>
+  <li><a href="#code-folding" id="toc-code-folding" class="nav-link" data-scroll-target="#code-folding"><span class="header-section-number">4.1</span> Code folding</a></li>
+  <li><a href="#table-of-contents" id="toc-table-of-contents" class="nav-link" data-scroll-target="#table-of-contents"><span class="header-section-number">4.2</span> Table of contents</a></li>
+  <li><a href="#themes" id="toc-themes" class="nav-link" data-scroll-target="#themes"><span class="header-section-number">4.3</span> Themes</a></li>
+  <li><a href="#global-chunk-options" id="toc-global-chunk-options" class="nav-link" data-scroll-target="#global-chunk-options"><span class="header-section-number">4.4</span> Global chunk options</a></li>
+  <li><a href="#embedding-html-resources" id="toc-embedding-html-resources" class="nav-link" data-scroll-target="#embedding-html-resources"><span class="header-section-number">4.5</span> Embedding HTML resources</a></li>
+  <li><a href="#multiple-formats" id="toc-multiple-formats" class="nav-link" data-scroll-target="#multiple-formats"><span class="header-section-number">4.6</span> Multiple formats</a></li>
+  <li><a href="#parameters" id="toc-parameters" class="nav-link" data-scroll-target="#parameters"><span class="header-section-number">4.7</span> Parameters</a></li>
+  </ul></li>
+  <li><a href="#presentations" id="toc-presentations" class="nav-link" data-scroll-target="#presentations"><span class="header-section-number">5</span> Presentations</a>
+  <ul>
+  <li><a href="#slides" id="toc-slides" class="nav-link" data-scroll-target="#slides"><span class="header-section-number">5.1</span> Slides</a></li>
+  <li><a href="#divisions" id="toc-divisions" class="nav-link" data-scroll-target="#divisions"><span class="header-section-number">5.2</span> Divisions</a></li>
+  <li><a href="#presentation-options" id="toc-presentation-options" class="nav-link" data-scroll-target="#presentation-options"><span class="header-section-number">5.3</span> Presentation options</a></li>
+  <li><a href="#multiple-columns" id="toc-multiple-columns" class="nav-link" data-scroll-target="#multiple-columns"><span class="header-section-number">5.4</span> Multiple columns</a></li>
+  <li><a href="#fragments" id="toc-fragments" class="nav-link" data-scroll-target="#fragments"><span class="header-section-number">5.5</span> Fragments</a></li>
+  </ul></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">6</span> Extra material</a>
+  <ul>
+  <li><a href="#tabsets" id="toc-tabsets" class="nav-link" data-scroll-target="#tabsets"><span class="header-section-number">6.1</span> Tabsets</a></li>
+  <li><a href="#callouts" id="toc-callouts" class="nav-link" data-scroll-target="#callouts"><span class="header-section-number">6.2</span> Callouts</a></li>
+  <li><a href="#mixing-r-and-python" id="toc-mixing-r-and-python" class="nav-link" data-scroll-target="#mixing-r-and-python"><span class="header-section-number">6.3</span> Mixing R and Python</a></li>
+  <li><a href="#citations" id="toc-citations" class="nav-link" data-scroll-target="#citations"><span class="header-section-number">6.4</span> Citations</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p>The <em>Quarto</em> format (<code>.qmd</code>) is a multi-functional format, which is especially useful for scientific coding and analyses. Quarto documents can be used both to save and execute code as well as generating reports in various output formats. This is done by mixing <em>markdown</em> and so-called <em>code chunks</em> in the same document (we have <a href="markdown">course materials for markdown</a> if you are unfamiliar with this format). The code itself as well as the output it generates can be included in the final report. Not only can Quarto work great for scientific coding, but can also be used for things such as presentation and websites - this entire workshop website is, in fact, created using only Quarto!</p>
+<p>Quarto makes your analysis more reproducible by connecting your code, figures and descriptive text. You can use it to make reproducible reports, rather than <em>e.g.</em> copy-pasting figures into a Word document. You can also use it as a notebook, in the same way as lab notebooks are used in a wet lab setting (or as we utilise <em>Jupyter notebooks</em> in the tutorial after this one). Quarto itself does not require any particular programming language to be installed - any language you want to use can be installed separately. The currently supported languages are R, Python, Julia and Observable. Quarto is fully compatible with both R Markdown and Jupyter documents.</p>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to set it up if you haven’t done so already. Place yourself in the <code>workshop-reproducible-research/tutorials/quarto/</code> directory, activate your <code>quarto-env</code> Conda environment and start your text editor or IDE of choice.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="A note on R Markdown">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note on R Markdown
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Quarto is an evolution of the <a href="https://rmarkdown.rstudio.com/">R Markdown</a> format, which was previously used in this course. While R Markdown is a widely-used and excellent software for code and reports, Quarto is most easily thought of as “R Markdown 2.0”. If you’re familiar with R Markdown, you will find Quarto to be highly similar. The creators of both Quarto and R Markdown (<a href="https://posit.co/">Posit</a>) have stated that R Markdown is not going to be deprecated, but most newer features will only come to Quarto. This means that if you’ve used R Markdown in the past <em>now</em> is a good time to make the switch, but you don’t have to. You can check out the <a href="https://quarto.org/docs/faq/rmarkdown.html">Quarto website</a> for more in-depth discussions regarding Quarto/R Markdown (dis-)similarities.</p>
+</div>
+</div>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>Let’s start with creating basic Quarto document that we can work with.</p>
+<section id="creating-quarto-documents" class="level3" data-number="2.1">
+<h3 data-number="2.1" class="anchored" data-anchor-id="creating-quarto-documents"><span class="header-section-number">2.1</span> Creating Quarto documents</h3>
+<p>Quarto documents are just plain text files with the <code>.qmd</code> extension. Create a new file called <em>e.g.</em> <code>quarto-tutorial.qmd</code> and copy the following into it:</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="pp">---</span></span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">title</span><span class="kw">:</span><span class="at"> </span><span class="st">"Untitled Quarto Document"</span></span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="fu">author</span><span class="kw">:</span><span class="at"> </span><span class="st">"Jane Doe"</span></span>
+<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="fu">format</span><span class="kw">:</span><span class="at"> html</span></span>
+<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a><span class="pp">---</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This is a so-called <em>YAML header</em>, which is where we specify the general settings of the document in the form of <code>key: value</code>. The <code>title</code> and <code>author</code> are just what they sound like, while the <code>format</code> field specifies what type of output you want the final report to be in (alternatives include <code>pdf</code>, <code>revealjs</code> and <a href="https://quarto.org/docs/output-formats/all-formats.html">many others</a>). Here we have specified that we want HTML output, which is perhaps the most useful for scientific computing.</p>
+<ul>
+<li>Change the title to <code>My first Quarto document</code> and the author to your name.</li>
+</ul>
+<p>Let’s add some actual content to the document, starting with some basic markdown:</p>
+<ul>
+<li>Add some text into your Quarto document (including an empty line between the YAML header and the text), <em>e.g.</em> the following:</li>
+</ul>
+<pre><code>This is my first Quarto document!
+
+# This is a header
+
+This is where I'll soon add some *code* related to the first header.</code></pre>
+<p>Let’s see what this document looks like when it’s rendered into HTML by Quarto:</p>
+<ul>
+<li>Go to the command line and type <code>quarto render quarto-tutorial.qmd</code>.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="Rendering">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Rendering
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you’re using <em>e.g.</em> RStudio or VSCode to edit your Quarto document you might have access to a <em>render</em> button, which means you don’t have to run the above command from the command line if you prefer.</p>
+</div>
+</div>
+<p>Open your new <code>quarto-tutorial.html</code> file that was created and see what it looks like. It’s only markdown content so far, so let’s add some R code using a <em>code chunk</em>:</p>
+<div class="sourceCode" id="cb3"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{r}</span></span>
+<span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a><span class="in">Sys.Date()</span></span>
+<span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Notice that we delimit the code chunk from the rest of the document’s contents using three backticks (```) and specify the R language using curly brackets (<code>{r}</code>). The code itself just prints the current date.</p>
+<ul>
+<li>Render the document again and see what it looks like.</li>
+</ul>
+<p>You can also name chunks by adding it after the language:</p>
+<div class="sourceCode" id="cb4"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{r}</span></span>
+<span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a><span class="in">Sys.Date()</span></span>
+<span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This is useful for debugging when something has gone wrong, since it’ll be easier to see exactly which code chunk an error happened (instead of just showing the chunk as a number).</p>
+<p>We can also get <em>in-line code</em> using <code>{r} &lt;R CODE&gt;</code>, like so:</p>
+<pre><code>The current date is `{r} Sys.Date()`.</code></pre>
+<ul>
+<li>Add the example above and render the document again to make sure it worked.</li>
+</ul>
+</section>
+<section id="previewing-documents" class="level3" data-number="2.2">
+<h3 data-number="2.2" class="anchored" data-anchor-id="previewing-documents"><span class="header-section-number">2.2</span> Previewing documents</h3>
+<p>Quarto has a highly useful command for when you’re working on a document: <code>preview</code>. It’s essentially a live preview of the document you’re working on that will automatically render when you introduce changes to the document.</p>
+<ul>
+<li>Type <code>quarto preview quarto-tutorial.qmd</code> in the command line.</li>
+</ul>
+<p>Your default web browser should now have opened a new window with your rendered document, while your command line should say something like the following:</p>
+<pre class="no-highlight"><code>Watching files for changes
+Browse at http://localhost:4175/</code></pre>
+<p>You can’t type new commands at the moment, because the Quarto Preview command is still running - it’s watching for any new changes to the Quarto document you specified.</p>
+<ul>
+<li>Change or add some markdown text to your Quarto document, <em>e.g.</em> <code>This is a   code chunk</code> instead of the previous text under the first header. Make sure you save the document.</li>
+</ul>
+<p>The HTML document in your browser should have updated to reflect your newest changes automatically. Previewing documents is great when you want to have continuous feedback to the changes you make and can make the process of writing more seamless, since you don’t have to manually render all the time. Previewing will still render the entire document, however, meaning that if you have some heavy computations you might not want to re-render on every single save. For those cases you might instead prefer to stick with manual rendering when you are satisfied with multiple changes. You can abort a preview like any on-going command, <em>e.g.</em> using <code>Ctrl-C</code>.</p>
+<p>In the rest of the tutorial it’s up to you whether you want to use <code>preview</code> or not - the tutorial will just mention when it’s time to render, you decide how that’s done.</p>
+</section>
+<section id="rendering-to-pdf" class="level3" data-number="2.3">
+<h3 data-number="2.3" class="anchored" data-anchor-id="rendering-to-pdf"><span class="header-section-number">2.3</span> Rendering to PDF</h3>
+<p>So far we’ve only rendered to HTML, but sometimes you prefer a PDF. This entails changing the <code>format</code> option in the YAML header:</p>
+<ul>
+<li>Change the format to <code>pdf</code> in the header and render your document.</li>
+</ul>
+<p>You can add any raw LaTeX commands you want to your document when you’re rendering to PDF, <em>e.g.</em> <code>\footnotsize</code> to change the font size. You also have LaTeX-specific settings, such as setting the geometry for the whole document or specifying a citation method. While the details of LaTeX are outside the scope of this course, it’s useful to be aware of this functionality of Quarto so that you may use it if you already know LaTeX or if you want to learn it.</p>
+<p>Switch back to HTML rendering before you move on.</p>
+</section>
+<section id="languages" class="level3" data-number="2.4">
+<h3 data-number="2.4" class="anchored" data-anchor-id="languages"><span class="header-section-number">2.4</span> Languages</h3>
+<p>The examples so far have been using R, but we could just as easily have used Python. All we have to do is to change our code chunk to specify <code>{python}</code> as language and its content to be the equivalent Python code:</p>
+<div class="sourceCode" id="cb7"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="in">from datetime import date</span></span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a><span class="in">print(date.today())</span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Change the code chunk to the above Python chunk instead and render your document again.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="A note on Python in-line code">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+A note on Python in-line code
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Quarto support for in-line python code was added in version 1.4, so if you’re using an older version of Quarto simply remove the in-line code example. You can check your quarto version by running <code>quarto --version</code> on the commandline. As of this writing, the 1.4 version of Quarto can be obtained from the pre-release page: https://quarto.org/docs/download/prerelease</p>
+<p>If you’re using Quarto version 1.4 or higher and want to try the in-line code example above for Python, change the line to:</p>
+<pre><code>The current date is `{python} date.strftime(date.today(), format="%Y-%m-%d")`</code></pre>
+</div>
+</div>
+<p>So far we’ve had Quarto automatically determine which language <em>engine</em> should be used, which it detects through the code chunks we’ve written. We can also do this explicitly by adding <code>engine: knitr</code> or <code>engine: jupyter</code> to the YAML header.</p>
+<ul>
+<li>Explicitly add <code>engine: jupyter</code> to your YAML header and render the document.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="Making sure your Jupyter engine is recognised">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Making sure your Jupyter engine is recognised
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Quarto attempts to identify a suitable Jupyter engine for your system when you include Python code chunks. However, if you want to use Jupyter available in a specific conda environment (<em>e.g.</em> your <code>quarto-env</code> environment) you need to take some extra steps. Please visit <a href="https://github.com/Anaconda-Platform/nb_conda_kernels#use-with-nbconvert-voila-papermill">this link</a> and follow steps 1-4. In the final step, check for the name of the kernel matching your <code>quarto-env</code> conda environment, <em>e.g.</em></p>
+<div class="sourceCode" id="cb9"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="ex">$</span> jupyter kernelspec list</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="ex">Available</span> kernels:</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a><span class="ex">...</span></span>
+<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="ex">conda-env-quarto-env-py</span>    /Users/<span class="op">&lt;</span>your-user-name/Library/Jupyter/kernels/conda-env-quarto-env-py</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Using the example output from above we can add the following to the YAML header of our Quarto document:</p>
+<div class="sourceCode" id="cb10"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="fu">jupyter</span><span class="kw">:</span></span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="at"> </span><span class="fu">kernelspec</span><span class="kw">:</span></span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="at">   </span><span class="fu">display_name</span><span class="kw">:</span><span class="at"> Python 3</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="at">   </span><span class="fu">language</span><span class="kw">:</span><span class="at"> python</span></span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a><span class="at">   </span><span class="fu">name</span><span class="kw">:</span><span class="at"> conda-env-quarto-env-py</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+<p>It can be useful to explicitly set the language for the document, as it makes it clearer from just the YAML header what language will be used. There are also more language-related options for Quarto, but we’ll save those for later in the tutorial.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section you learned how to create, edit and render basic Quarto documents using different languages.</p>
+</div>
+</div>
+</section>
+</section>
+<section id="code-chunks" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="code-chunks"><span class="header-section-number">3</span> Code chunks</h2>
+<p>Sometimes you want to add <em>chunk options</em> to the code chunks in your Quarto documents. They are also in YAML format and are prefixed with a special type of comment (<code>#|</code>). It can look something like this:</p>
+<div class="sourceCode" id="cb11"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="in">#| echo: false</span></span>
+<span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="in">from datetime import date</span></span>
+<span id="cb11-4"><a href="#cb11-4" aria-hidden="true" tabindex="-1"></a><span class="in">print(date.today())</span></span>
+<span id="cb11-5"><a href="#cb11-5" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Add the chunk option above to your document and render the document again.</li>
+</ul>
+<p>Notice how we no longer see the code itself, just the output? This is because the <code>echo</code> option specifies just that: whether we see the code or not. There are a number of such chunk options that are useful to know about:</p>
+<table class="table">
+<colgroup>
+<col style="width: 18%">
+<col style="width: 82%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>Chunk option</th>
+<th>Effect</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td><code>echo</code></td>
+<td>Include the chunk code in the output.</td>
+</tr>
+<tr class="even">
+<td><code>eval</code></td>
+<td>Evaluate the code chunk.</td>
+</tr>
+<tr class="odd">
+<td><code>output</code></td>
+<td>Include the results of executing the code in the output.</td>
+</tr>
+<tr class="even">
+<td><code>warning</code></td>
+<td>Include warnings in the output.</td>
+</tr>
+<tr class="odd">
+<td><code>error</code></td>
+<td>Include errors in the output (note that this implies that errors executing code will not halt processing of the document).</td>
+</tr>
+<tr class="even">
+<td><code>include</code></td>
+<td>Prevent both code and output from being included.</td>
+</tr>
+</tbody>
+</table>
+<ul>
+<li>Check what happens if you change <code>echo: False</code> to <code>eval: False</code>.</li>
+</ul>
+<p>Now the code in the code chunk is not run, which means that if you previously added the python inline code it will no longer work because it depends on <code>date</code> from the <code>datetime</code> module that we import in the code chunk. Remove the inline code snippet if you added it. Then try rendering again. Now you should see the code itself but it won’t be run and therefore has no output.</p>
+<section id="figure-options" class="level3" data-number="3.1">
+<h3 data-number="3.1" class="anchored" data-anchor-id="figure-options"><span class="header-section-number">3.1</span> Figure options</h3>
+<p>There are also options related to figures, but for that we need to actually have some code that produces a figure.</p>
+<ul>
+<li>Change the YAML header to use R instead of Python, remove the Python code chunk and replace it with the following (don’t worry if you don’t understand the R code itself, it’s just as example):</li>
+</ul>
+<div class="sourceCode" id="cb12"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{r}</span></span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="in">library("ggplot2")</span></span>
+<span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a><span class="in">library("palmerpenguins")</span></span>
+<span id="cb12-4"><a href="#cb12-4" aria-hidden="true" tabindex="-1"></a><span class="in">data(penguins, package = "palmerpenguins")</span></span>
+<span id="cb12-5"><a href="#cb12-5" aria-hidden="true" tabindex="-1"></a><span class="in">ggplot(penguins, aes(x      = bill_length_mm,</span></span>
+<span id="cb12-6"><a href="#cb12-6" aria-hidden="true" tabindex="-1"></a><span class="in">                     y      = body_mass_g,</span></span>
+<span id="cb12-7"><a href="#cb12-7" aria-hidden="true" tabindex="-1"></a><span class="in">                     colour = species)) +</span></span>
+<span id="cb12-8"><a href="#cb12-8" aria-hidden="true" tabindex="-1"></a><span class="in">    geom_point(size = 2) +</span></span>
+<span id="cb12-9"><a href="#cb12-9" aria-hidden="true" tabindex="-1"></a><span class="in">    theme_bw() +</span></span>
+<span id="cb12-10"><a href="#cb12-10" aria-hidden="true" tabindex="-1"></a><span class="in">    labs(x      = "Bill length (mm)",</span></span>
+<span id="cb12-11"><a href="#cb12-11" aria-hidden="true" tabindex="-1"></a><span class="in">         y      = "Body mass (g)",</span></span>
+<span id="cb12-12"><a href="#cb12-12" aria-hidden="true" tabindex="-1"></a><span class="in">         colour = "Species") +</span></span>
+<span id="cb12-13"><a href="#cb12-13" aria-hidden="true" tabindex="-1"></a><span class="in">    ggtitle("Penguin weight and bill length") +</span></span>
+<span id="cb12-14"><a href="#cb12-14" aria-hidden="true" tabindex="-1"></a><span class="in">    theme(plot.title = element_text(hjust = 0.5)) +</span></span>
+<span id="cb12-15"><a href="#cb12-15" aria-hidden="true" tabindex="-1"></a><span class="in">    scale_colour_manual(values = c("#c1dea0", "#85be42", "#425f21"))</span></span>
+<span id="cb12-16"><a href="#cb12-16" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>When you’ve rendered the document you should see both the code and a figure using the <a href="https://allisonhorst.github.io/palmerpenguins/">Palmer Penguins dataset</a>. You should also see a warning along the lines of <code>Removed 2 rows containing missing values</code>.</p>
+<ul>
+<li>Suppress the warning by adding <code>#| warning: false</code> as a chunk option and render.</li>
+</ul>
+<p>There are two chunk options related to figure sizes: <code>fig-width</code> and <code>fig-height</code> (expressed in inches). These allow you to experiment with your figures and make them look the way you want.</p>
+<ul>
+<li>Add both the <code>fig-width: 10</code> and <code>fig-height: 5</code> chunk options and render.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>These two chunk options are only available when using the Knitr engine, not for Jupyter. There is a way to set these for the whole document with Jupyter, though, which we’ll talk more about in the next section of the tutorial.</p>
+</div>
+</div>
+<p>You can also add captions and alt text using <code>fig-cap</code> and <code>fig-alt</code>, respectively.</p>
+<ul>
+<li>Add a suitable caption and alt text to the figure and render.</li>
+</ul>
+<p>If you want to place the caption in the margin of your document you can use the <code>cap-location</code> chunk option.</p>
+<ul>
+<li>Add <code>cap-location: margin</code> to your chunk options and render.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>On some quarto versions the <code>cap-location:</code> option may not work as expected. If you experience this, try also adding <code>#| label: fig-penguins</code> to the chunk.</p>
+</div>
+</div>
+</section>
+<section id="cross-references" class="level3" data-number="3.2">
+<h3 data-number="3.2" class="anchored" data-anchor-id="cross-references"><span class="header-section-number">3.2</span> Cross-references</h3>
+<p>A convenient way to be able to refer to figures in text is by adding a figure <code>label</code>, which will automatically add a figure number before your caption.</p>
+<ul>
+<li>Add a suitable label, <em>e.g.</em> <code>label: fig-penguins</code> to the chunk options.</li>
+</ul>
+<p>Cross-references use the <code>@</code> symbol and the corresponding label. You can thus write some markdown outside of a code chunk and refer to <em>e.g.</em> <code>@fig-penguins</code>, as per the example here. This is extremely useful if you’re writing a paper or a report where you want to refer to figures and content in the markdown text. Quarto even adds a clickable link to the figure itself as well!</p>
+</section>
+<section id="sub-figures" class="level3" data-number="3.3">
+<h3 data-number="3.3" class="anchored" data-anchor-id="sub-figures"><span class="header-section-number">3.3</span> Sub-figures</h3>
+<p>It’s also possible to create sub-figures using Quarto, instead of using whatever plotting library that your created the figures with.</p>
+<ul>
+<li>Add the following (almost identical) code at the bottom of the chunk you already have:</li>
+</ul>
+<div class="sourceCode" id="cb13"><pre class="sourceCode r code-with-copy"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">ggplot</span>(penguins, <span class="fu">aes</span>(<span class="at">x      =</span> bill_depth_mm,</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>                     <span class="at">y      =</span> body_mass_g,</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>                     <span class="at">colour =</span> species)) <span class="sc">+</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>    <span class="fu">geom_point</span>(<span class="at">size =</span> <span class="dv">2</span>) <span class="sc">+</span></span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>    <span class="fu">theme_bw</span>() <span class="sc">+</span></span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>    <span class="fu">labs</span>(<span class="at">x      =</span> <span class="st">"Bill depth (mm)"</span>,</span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a>         <span class="at">y      =</span> <span class="st">"Body mass (g)"</span>,</span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>         <span class="at">colour =</span> <span class="st">"Species"</span>) <span class="sc">+</span></span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>    <span class="fu">scale_colour_manual</span>(<span class="at">values =</span> <span class="fu">c</span>(<span class="st">"#c1dea0"</span>, <span class="st">"#85be42"</span>, <span class="st">"#425f21"</span>))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Also add the following to the chunk options:</li>
+</ul>
+<pre class="no-highlight"><code>#| fig-subcap:
+#|     - Bill length vs. body mass
+#|     - Bill depth vs. body mass</code></pre>
+<p>You should now see that we have two figures with separate sub-captions as well as the overall figure caption we previously added. We can also control the layout of these figures using the <code>layout-ncol</code> chunk option.</p>
+<ul>
+<li>Add a <code>layout-ncol: 2</code> chunk option and render the document.</li>
+</ul>
+<p>We now have a different, two-column layout instead, but whether you prefer this or just a one-column layout is up to you.</p>
+</section>
+<section id="tables" class="level3" data-number="3.4">
+<h3 data-number="3.4" class="anchored" data-anchor-id="tables"><span class="header-section-number">3.4</span> Tables</h3>
+<p>Tables work much in the same way as figures. It might, in our example, be nice to add a table with the data we previously plotted.</p>
+<ul>
+<li>Add the following code chunk to your document and render it:</li>
+</ul>
+<div class="sourceCode" id="cb15"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{r}</span></span>
+<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="in">#| label: tbl-penguins</span></span>
+<span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a><span class="in">#| tbl-cap: Palmer penguins bill length, width and body mass.</span></span>
+<span id="cb15-4"><a href="#cb15-4" aria-hidden="true" tabindex="-1"></a><span class="in">#| tbl-cap-location: margin</span></span>
+<span id="cb15-5"><a href="#cb15-5" aria-hidden="true" tabindex="-1"></a><span class="in">knitr::kable(</span></span>
+<span id="cb15-6"><a href="#cb15-6" aria-hidden="true" tabindex="-1"></a><span class="in">    penguins[1:10, c("species", "bill_length_mm", "bill_depth_mm", "body_mass_g")],</span></span>
+<span id="cb15-7"><a href="#cb15-7" aria-hidden="true" tabindex="-1"></a><span class="in">    col.names = c("Species", "Bill length (mm)", "Bill depth (mm)", "Body mass (g)")</span></span>
+<span id="cb15-8"><a href="#cb15-8" aria-hidden="true" tabindex="-1"></a><span class="in">)</span></span>
+<span id="cb15-9"><a href="#cb15-9" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section you learned several chunk, figure and table options, how cross-referencing works and how to add sub-figures.</p>
+</div>
+</div>
+</section>
+</section>
+<section id="document-options" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="document-options"><span class="header-section-number">4</span> Document options</h2>
+<p>So far we’ve mostly worked with chunk options, which are specific to the chunk they appear in. You can set many of these at the global document level, however, and there are also some options specifically for tailoring the document as a whole, regardless of chunk content.</p>
+<p>We’ve already looked at some global options, such as <code>title</code>, <code>author</code>, <code>format</code> and <code>engine</code>. Something that would go nicely with the first two is the <code>date</code> option. You could just write the actual date if you like, or you can use the <code>today</code> option:</p>
+<ul>
+<li>Add the following to the options: <code>date: today</code></li>
+</ul>
+<section id="code-folding" class="level3" data-number="4.1">
+<h3 data-number="4.1" class="anchored" data-anchor-id="code-folding"><span class="header-section-number">4.1</span> Code folding</h3>
+<p>A useful option we haven’t touched already is the <code>code-fold</code> option. This and similar global options are specified nested inside the <code>format</code> option, like so:</p>
+<div class="sourceCode" id="cb16"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="fu">format</span><span class="kw">:</span></span>
+<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">html</span><span class="kw">:</span></span>
+<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a><span class="at">        </span><span class="fu">code-fold</span><span class="kw">:</span><span class="at"> </span><span class="ch">true</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Add the <code>code-fold</code> option to your document and render it.</li>
+</ul>
+<p>This can be a nice default to use in scientific reports, as it hides the code by default but is always there for those who want to inspect it. You can also use the <code>code-summary</code> chunk option to specify a different text to show with the folded code instead of the default <code>Code</code>, <em>e.g.</em> <code>code-summary: Click to show code</code>.</p>
+<p>If you want to add the <code>code-summary</code> option to all chunks you can add the following to the yaml header:</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">language</span><span class="kw">:</span></span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">code-summary</span><span class="kw">:</span><span class="at"> Click to show code</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You can also add the <code>code-tools</code> option, which will add a drop-down menu to toggle visibility of all code as well as the ability to view the source of the document.</p>
+<ul>
+<li>Add the <code>code-tools: true</code> option and render the document.</li>
+</ul>
+</section>
+<section id="table-of-contents" class="level3" data-number="4.2">
+<h3 data-number="4.2" class="anchored" data-anchor-id="table-of-contents"><span class="header-section-number">4.2</span> Table of contents</h3>
+<p>Another useful document option is to add a table of contents, which can be done with the <code>toc</code> option. This will automatically populate the table of contents using the headers from your document.</p>
+<ul>
+<li><p>Add some more headings and/or sub-headings to your document.</p></li>
+<li><p>Add the <code>toc: true</code> option to the html format and render.</p></li>
+</ul>
+<p>The table of contents is to the right of the document by default, but you can change it using <code>toc-location</code>. The <code>toc-depth</code> allows you to control how many sub-heading levels are included in the table of contents.</p>
+<ul>
+<li>Add <code>toc-location: left</code> and <code>toc-depth: 2</code> to your document and render it.</li>
+</ul>
+<p>Having the table of contents on the left can be useful if you are using the margins for something, such as we are doing in this tutorial. You can similarly add section numbering using <code>number-sections</code> and <code>number-depth</code>. Smooth scrolling is not enabled by default, but you can add it using <code>smooth-scroll: true</code>. You can change the title of the table of contents using <code>toc-title</code>.</p>
+<ul>
+<li>Add section numbers, depth, smooth scrolling and a different table of contents title to your document and render it.</li>
+</ul>
+</section>
+<section id="themes" class="level3" data-number="4.3">
+<h3 data-number="4.3" class="anchored" data-anchor-id="themes"><span class="header-section-number">4.3</span> Themes</h3>
+<p>Quarto has a lot of <a href="https://bootswatch.com/">themes</a> available for it.</p>
+<ul>
+<li>Add <code>theme: flatly</code> under the HTML <code>format</code> option and render.</li>
+</ul>
+<p>If you want to get real advanced you can play around with lots of details regarding the themes and adjust as you see fit, or even just create your own theme. This is a bit too advanced to go through here, but you can read about it more in the <a href="https://quarto.org/docs/output-formats/html-themes.html">official documentation</a>.</p>
+</section>
+<section id="global-chunk-options" class="level3" data-number="4.4">
+<h3 data-number="4.4" class="anchored" data-anchor-id="global-chunk-options"><span class="header-section-number">4.4</span> Global chunk options</h3>
+<p>The chunk options we learnt about in the previous section of this tutorial can also be specified on the global document level. Instead of specifying <em>e.g.</em> <code>warning: false</code> or <code>fig-height: 5</code> in individual chunks we can add it to the main YAML header in the same manner as for <em>e.g.</em> code folding or table of contents. We’ll still have to specify options like labels or captions at the chunk-level, though.</p>
+<ul>
+<li>Add <code>warning: false</code> to your document header and remove it from the penguin figure chunk you already have.</li>
+</ul>
+</section>
+<section id="embedding-html-resources" class="level3" data-number="4.5">
+<h3 data-number="4.5" class="anchored" data-anchor-id="embedding-html-resources"><span class="header-section-number">4.5</span> Embedding HTML resources</h3>
+<p>When rendering HTML documents you get any figures and other resources in a <code>&lt;document-name&gt;_files/</code> directory, which is not always desirable. It’s easier to move the HTML around if all figures <em>etc.</em> are embedded directly in the HTML itself, which can be done by specifying <code>embed-resources: true</code> in the HTML format options. This option is false by default, meaning that you’ll also have to include the previously mentioned directory if you want to share the HTML with anybody.</p>
+<ul>
+<li><p>Remove the <code>&lt;document-name&gt;_files/</code> directory, refresh the rendered document and see what happens.</p></li>
+<li><p>Add the <code>embed_resources</code> option and render your document again.</p></li>
+</ul>
+<p>What happened first is that your figures should have disappeared when you deleted the resources directory. Embedding resources and rendering again should not re-create this directory, so now you’ll just have a stand-alone HTML file that is more portable than before.</p>
+</section>
+<section id="multiple-formats" class="level3" data-number="4.6">
+<h3 data-number="4.6" class="anchored" data-anchor-id="multiple-formats"><span class="header-section-number">4.6</span> Multiple formats</h3>
+<p>So far we’ve mostly been working with HTML output, but you don’t need to limit yourself to a single output format if you don’t want to.</p>
+<ul>
+<li>Add the <code>docx: default</code> line in the <code>format:</code> part of your YAML header and render your document.</li>
+</ul>
+<p>You should have gotten two separate output files now: a HTML and a DOCX (Word) file. You can specify further options for any of the formats you include, instead of just using the <code>default</code> settings as in this example.</p>
+<ul>
+<li>Render your document again, but supply the <code>--to html</code> flag.</li>
+</ul>
+<p>This will only render to the specified output format, which is highly useful when you want to write a Quarto document with more than one format but not always render them all.</p>
+</section>
+<section id="parameters" class="level3" data-number="4.7">
+<h3 data-number="4.7" class="anchored" data-anchor-id="parameters"><span class="header-section-number">4.7</span> Parameters</h3>
+<p>The last document-wide option we’ll touch on is <em>parameters</em>. This is useful for when you want to be able to run the same document with different parameters or options for some computations. How parameters are specified depends on which engine you’re using. With Knitr you can specify parameters using the <code>params</code> option:</p>
+<ul>
+<li>Add the following code to your YAML header:</li>
+</ul>
+<div class="sourceCode" id="cb18"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="fu">params</span><span class="kw">:</span></span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">point_size</span><span class="kw">:</span><span class="at"> </span><span class="dv">2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<ul>
+<li>Also change the hard-coded <code>geom_point(size = 2)</code> to <code>geom_point(size = params$point_size)</code> in the two <code>ggplot</code> calls in the first code chunk.</li>
+</ul>
+<p>We have thus specified a parameter called <code>point_size</code> in the YAML header and referred to it in the code using <code>params$point_size</code>. You can now change this parameter at run-time by supplying the <code>-P &lt;param&gt;:&lt;value&gt;</code> (or <code>--execute-param</code>) flag to <code>quarto render</code>.</p>
+<p>Notice that this won’t work if you want to use a parameter to control <em>e.g.</em> a chunk option like <code>layout-ncol</code>. For this we need to use an in-line code expression: <code>#| layout-ncol: !expr params$ncols</code>.</p>
+<ul>
+<li>Add a parameter for the <code>layout-ncol</code> chunk option to the YAML header</li>
+<li>Also add the <code>layout-ncol</code> chunk option to the figure chunk using the syntax above and render to make sure it works.</li>
+</ul>
+<p>Note that to modify multiple parameters at run-time you have to use the <code>-P param:value</code> flag multiple times, like so:</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="ex">quarto</span> render quarto-tutorial.qmd <span class="at">-P</span> point_size:4 <span class="at">-P</span> ncols:1</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you’re using the Jupyter engine you can instead specify parameters by designating a single cell as a <em>parameter cell</em>, like so:</p>
+<div class="sourceCode" id="cb20"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a><span class="in">#| tags: [parameters]</span></span>
+<span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a><span class="in">point_size = 2</span></span>
+<span id="cb20-4"><a href="#cb20-4" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You can also specify parameters in a <code>params.yml</code> file and instruct quarto to use them with the <code>--execute-params params.yml</code> flag when rendering. Note that the parameters must be defined in the document (in the YAML header when using the <code>knitr</code> engine, or in a cell when using the <code>jupyter</code> engine). Pointing quarto to a <code>params.yml</code> file with <code>--execute-params</code> only overrides them when rendering.</p>
+<p>Using parameters is extremely useful when you’re using a workflow manager system (<em>e.g.</em> Snakemake or Nextflow), since you can easily specify sample-specific parameters from the command line directly from your workflow manager.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this sections we covered a number of document-wide options, including code-folding, table of contents, theming, HTML portability, using multiple output formats and parameters.</p>
+</div>
+</div>
+</section>
+</section>
+<section id="presentations" class="level2" data-number="5">
+<h2 data-number="5" class="anchored" data-anchor-id="presentations"><span class="header-section-number">5</span> Presentations</h2>
+<p>Quarto can also be used to create presentations in multiple formats such as <code>reveal.js</code> (HTML), <code>beamer</code> (PDF) and <code>pptx</code> (PowerPoint) - the most powerful of these formats by far is the first one. Creating presentations with Quarto is quite similar to creating general Quarto documents, with some added features to keep in mind.</p>
+<section id="slides" class="level3" data-number="5.1">
+<h3 data-number="5.1" class="anchored" data-anchor-id="slides"><span class="header-section-number">5.1</span> Slides</h3>
+<p>The first thing that’s needed for creating a presentation is deciding what constitutes a slide. The default is that slides are delimited by a document’s header levels.</p>
+<ul>
+<li>Render your document using the <code>--to revealjs</code> flag and open it.</li>
+</ul>
+<p>You should now have the same document we’ve been working on for this tutorial in presentation format! You can step through the slides using the arrow keys, press <code>F</code> to go into full-screen mode, <code>S</code> to view speaker notes, <code>M</code> for the menu (you can also click in the lower left corner to get this menu) and <code>ESC</code> to go back.</p>
+<p>If you’ve followed along you should have one level-1 header (<code>#</code>) and two level-2 headers (<code>##</code>). Notice that the level-1 header here will render as a blank page with just the header content on it, while the level-2 headers will render as normal slide headers. This all looks quite nice, and we didn’t even have to change a thing! Disregard that the table on the last slide doesn’t fit for now, we’ll get back to it later. Another method of delimiting slides is using a horizontal rule, <code>---</code>, which allows you more fine-grained control over slides and their content (and is especially useful if you want to have a slide without a title).</p>
+</section>
+<section id="divisions" class="level3" data-number="5.2">
+<h3 data-number="5.2" class="anchored" data-anchor-id="divisions"><span class="header-section-number">5.2</span> Divisions</h3>
+<p>There are many ways you can add presentation-specific content to your slides, some of which you’d recognise from <em>e.g.</em> PowerPoint functionality.</p>
+<p>So called “divisions” or “divs” allow you to control the appearance of content in your slides.</p>
+<p>Let’s fix that issue with the table that was larger than the page. The problem here is one of <em>content overflow</em>, which can be fixed by adding a special <code>{.smaller}</code> div.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Divs do not work for level1 headings (starting with a single <code>#</code>).</p>
+</div>
+</div>
+<ul>
+<li>Add the <code>{.smaller}</code> div to the table header (it should read something like <code>## A table {.smaller}</code>) and render.</li>
+</ul>
+<p>That should have automatically re-sized the table to fit into the slide. Another way to solve this is to make slide content scrollable.</p>
+<ul>
+<li>Change the <code>{.smaller}</code> div to a <code>{.scrollable}</code> div and render.</li>
+</ul>
+<p>Instead of re-sizing the table we now get the ability to scroll down it instead; whichever solution you prefer is up to you.</p>
+<p>Adding divisions of various types like this is a common thing for Quarto presentations. Another common presentation-functionality is incremental lists, which can also be achieved with divisions. When adding a division to slide content we specify the division’s content in a manner similar to a code chunk, like in the following example:</p>
+<pre><code>## Penguin species
+
+::: {.incremental}
+ - Adelie
+ - Chinstrap
+ - Gentoo
+:::</code></pre>
+<ul>
+<li>Add the code above to your document and render it.</li>
+</ul>
+<p>Stepping through incremental content works the same as for stepping through slides, <em>i.e.</em> using the arrow keys.</p>
+<ul>
+<li>Render your document to <code>html</code> instead of <code>revealjs</code>.</li>
+</ul>
+<p>Notice that Quarto rendered the HTML document just fine, even though you now have some presentation-specific code? This allows you to switch between the formats on-demand without having much overhead or format-specific code, which is great when you want to present your work without having to whip out a full-fledged presentation and all the work that goes into that!</p>
+<p>There are other useful divisions as well, including <code>{.notes}</code> (speaker notes), <code>{.aside}</code> (additional commentary similar to footnotes), <code>{.footer}</code> (slide footers), which you can add in the same way as we did for the incremental list above.</p>
+<ul>
+<li>Pick one of the above-mentioned divisions to add to your presentation and render it.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>The notes and footer divisions will appear as normal Markdown text when rendering to HTML, while asides will appear in the margin. These divisions thus represent cases you might want to avoid if you want to be completely format-agnostic.</p>
+</div>
+</div>
+</section>
+<section id="presentation-options" class="level3" data-number="5.3">
+<h3 data-number="5.3" class="anchored" data-anchor-id="presentation-options"><span class="header-section-number">5.3</span> Presentation options</h3>
+<p>Just like the other formats you can specify presentation-specific options at the document-level using the YAML header. You could, for example, add the <code>{.scrollable}</code> or <code>{.smaller}</code> div to the entire document.</p>
+<ul>
+<li>Add the <code>revealjs</code> format to the YAML header as well as a <code>scrollable: true</code> option to it.</li>
+</ul>
+<p>You can also specify one of the built-in themes here.</p>
+<ul>
+<li>Add <code>theme: simple</code> to your YAML header and render.</li>
+</ul>
+<p>You can find the entire list of themes at the <a href="https://quarto.org/docs/presentations/revealjs/#themes">Quarto website</a>.</p>
+</section>
+<section id="multiple-columns" class="level3" data-number="5.4">
+<h3 data-number="5.4" class="anchored" data-anchor-id="multiple-columns"><span class="header-section-number">5.4</span> Multiple columns</h3>
+<p>Sometimes you’ll want to have more than one column in your presentation, which is done with the <code>{.columns}</code> and <code>{.column}</code> divisions. The former specifies that a section with multiple columns is starting, while the second specifies when each column starts, like so:</p>
+<pre class="no-highlight"><code>:::: {.columns}
+
+::: {.column}
+Left column
+:::
+
+::: {.column}
+Right column
+:::
+
+::::</code></pre>
+<ul>
+<li>Add multiple columns with some content to your presentation and render it.</li>
+</ul>
+<p>You can also control the widths of these columns using <em>e.g.</em> <code>{.column width="40%"}</code>.</p>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>The <code>{.columns}</code> div also works for a normal HTML render, so it’ll look the same regardless of whether you output as a document or a presentation.</p>
+</div>
+</div>
+</section>
+<section id="fragments" class="level3" data-number="5.5">
+<h3 data-number="5.5" class="anchored" data-anchor-id="fragments"><span class="header-section-number">5.5</span> Fragments</h3>
+<p>We’ve already learnt how to get incremental lists working, but what about general content we want to incrementally step through? This is done with the <code>{.fragment}</code> div.</p>
+<ul>
+<li>Add a <code>{.fragment}</code> div to some slide content and render.</li>
+</ul>
+<p>Fragments are similar to “animations” from PowerPoint and come with lots of built-in variations, <em>e.g.</em> <code>fade-out</code>, <code>grow</code>, <code>strike</code> and <a href="https://quarto.org/docs/presentations/revealjs/advanced.html#fragment-classes">several others</a>.</p>
+<ul>
+<li>Add a fragment variant to your content, <em>e.g.</em> <code>{.fragment .grow}</code> and render your document.</li>
+</ul>
+<p>You can also control the order in which fragments appear using the <code>fragment-index=&lt;NUMBER&gt;</code> option.</p>
+<ul>
+<li>Create a new slide and add some content with a different order of appearance than the order of the code. If you need help or inspiration, click below.</li>
+</ul>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-13-contents" aria-controls="callout-13" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-13" class="callout-13-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre><code>## Why Palmer Penguins?
+
+::: {.fragment fragment-index=2}
+![](https://allisonhorst.github.io/palmerpenguins/logo.png){fig-align="center"}
+:::
+
+::: {.fragment fragment-index=1}
+The goal of `palmerpenguins` is to provide a good dataset for data exploration
+and visualization, as an alternative to `iris.`
+:::</code></pre>
+</div>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we covered how to create presentations using Quarto, including how to add various divisions, global slide-options, multiple columns and fragments.</p>
+</div>
+</div>
+</section>
+</section>
+<section id="extra-material" class="level2" data-number="6">
+<h2 data-number="6" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">6</span> Extra material</h2>
+<p>The following material contains some more advanced things that you can do with Quarto but are not really part of the core of the Quarto material. It’s a mix of various functionalities, and you don’t have to go through it if you don’t want to.</p>
+<p>If you’re interested in learning more about Quarto in general, here are some reading tips:</p>
+<ul>
+<li><a href="https://quarto.org/docs/guide/">The Quarto documentation</a></li>
+<li><a href="https://quarto.org/docs/gallery/">A gallery of Quarto examples</a></li>
+<li><a href="https://github.com/mcanouil/awesome-quarto">An awesome list of Quarto content</a></li>
+</ul>
+<section id="tabsets" class="level3" data-number="6.1">
+<h3 data-number="6.1" class="anchored" data-anchor-id="tabsets"><span class="header-section-number">6.1</span> Tabsets</h3>
+<p>Sometimes you’ll want to present the same content in different ways, <em>e.g.</em> the equivalent code in different languages. Look at the following toy example:</p>
+<div class="sourceCode" id="cb24"><pre class="sourceCode markdown code-with-copy"><code class="sourceCode markdown"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>::: {.panel-tabset}</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a><span class="fu">## R</span></span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a><span class="in">```{r}</span></span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a><span class="in">words &lt;- c("Foo", "bar")</span></span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a><span class="in">print(paste(words), collapse = ' ')</span></span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a><span class="fu">## Python</span></span>
+<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a><span class="in">words = ["Foo", "bar"]</span></span>
+<span id="cb24-11"><a href="#cb24-11" aria-hidden="true" tabindex="-1"></a><span class="in">print(' '.join(words))</span></span>
+<span id="cb24-12"><a href="#cb24-12" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb24-13"><a href="#cb24-13" aria-hidden="true" tabindex="-1"></a>:::</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Try adding that to a document and see that you’ll get a set of tabs that change the content of the code chunk to the respective language. This is not only useful for showing different languages, but can be used for other situations as well. For example, you might want to run different analyses and show them in different tabs, or even show different interactive elements in separate tabs.</p>
+</section>
+<section id="callouts" class="level3" data-number="6.2">
+<h3 data-number="6.2" class="anchored" data-anchor-id="callouts"><span class="header-section-number">6.2</span> Callouts</h3>
+<p>If you’re writing some sort of documentation, tutorial or just want to draw special attention to something, <em>callouts</em> are here for you. They render as a coloured block with a header and content. There are five types of callouts: <code>note</code>, <code>tip</code>, <code>warning</code>, <code>caution</code>, and <code>important</code>. As with lots of Quarto things they are specified using a division, like so:</p>
+<pre><code>::: {.callout-note}
+This is a note callout.
+:::</code></pre>
+<p>The different callouts come with appropriate colours by default, which you can change in the theme. You can also have collapsible callouts by adding the <code>collapse=true</code> option, where <code>true</code> will have the callout collapsed by default. You can also specify titles in the same way using the <code>title=&lt;TITLE&gt;</code> option or by adding the title directly to the callout content, like so:</p>
+<pre><code>::: {.callout-note}
+## This is the callout title
+
+This is a note callout.
+:::</code></pre>
+<p>You can change the overall appearance of callouts by using the <code>appearance</code> option or the <code>callout-appearance</code> global option. Valid values are <code>default</code>, <code>simple</code> and <code>minimal</code>, with decreasing usage of colours and weights. You can also suppress the callout icons using <code>icon=false</code> or <code>callout-icon: false</code> in a similar manner.</p>
+</section>
+<section id="mixing-r-and-python" class="level3" data-number="6.3">
+<h3 data-number="6.3" class="anchored" data-anchor-id="mixing-r-and-python"><span class="header-section-number">6.3</span> Mixing R and Python</h3>
+<p>Earlier in the tutorial we showed how to change the language using the <code>engine</code> global option, but there is actually a way to use both R and Python in the same Quarto document. This is done via the Knitr engine and the <code>reticulate</code> R package, which allows communication between any variables and data you store in either R or Python code chunks. While this may not be that common of a use-case, it’s still great that it’s there for those that want access to it. We won’t go through the details of how this works here, but you’re welcome to go and check out the <a href="https://rstudio.github.io/reticulate/">official reticulate website</a> for yourself.</p>
+<p>If you just want to mix R and Python in a single Quarto document without the interoperability between the languages it’s a lot simpler, though. You can either just install the <code>reticulate</code> package (<code>r-reticulate</code> in Conda) or add the <code>python.reticulate=FALSE</code> chunk option to the Python chunks.</p>
+</section>
+<section id="citations" class="level3" data-number="6.4">
+<h3 data-number="6.4" class="anchored" data-anchor-id="citations"><span class="header-section-number">6.4</span> Citations</h3>
+<p>You can actually write whole articles in Quarto! For that purpose, it’s also great that you can cite things from a bibliography as well. Specifying the bibliography file(s) is done using the <code>bibliography</code> global option; specifying the citation style can be done using a <code>csl</code> (Citation Style Language) file and the <code>csl</code> global option. Citation itself is similar to cross-referencing (<code>@cross-ref</code>), but is surrounded by square brackets: <code>[@citation]</code>. You can read more details about citations at the <a href="https://quarto.org/docs/authoring/footnotes-and-citations.html">Quarto website</a>.</p>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/pages/snakemake.html b/pages/snakemake.html
new file mode 100644
index 00000000..c83b9278
--- /dev/null
+++ b/pages/snakemake.html
@@ -0,0 +1,2052 @@
+<!DOCTYPE html>
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"><head>
+
+<meta charset="utf-8">
+<meta name="generator" content="quarto-1.4.549">
+
+<meta name="viewport" content="width=device-width, initial-scale=1.0, user-scalable=yes">
+
+
+<title>Working with Snakemake</title>
+<style>
+code{white-space: pre-wrap;}
+span.smallcaps{font-variant: small-caps;}
+div.columns{display: flex; gap: min(4vw, 1.5em);}
+div.column{flex: auto; overflow-x: auto;}
+div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
+ul.task-list{list-style: none;}
+ul.task-list li input[type="checkbox"] {
+  width: 0.8em;
+  margin: 0 0.8em 0.2em -1em; /* quarto-specific, see https://github.com/quarto-dev/quarto-cli/issues/4556 */ 
+  vertical-align: middle;
+}
+/* CSS for syntax highlighting */
+pre > code.sourceCode { white-space: pre; position: relative; }
+pre > code.sourceCode > span { line-height: 1.25; }
+pre > code.sourceCode > span:empty { height: 1.2em; }
+.sourceCode { overflow: visible; }
+code.sourceCode > span { color: inherit; text-decoration: inherit; }
+div.sourceCode { margin: 1em 0; }
+pre.sourceCode { margin: 0; }
+@media screen {
+div.sourceCode { overflow: auto; }
+}
+@media print {
+pre > code.sourceCode { white-space: pre-wrap; }
+pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
+}
+pre.numberSource code
+  { counter-reset: source-line 0; }
+pre.numberSource code > span
+  { position: relative; left: -4em; counter-increment: source-line; }
+pre.numberSource code > span > a:first-child::before
+  { content: counter(source-line);
+    position: relative; left: -1em; text-align: right; vertical-align: baseline;
+    border: none; display: inline-block;
+    -webkit-touch-callout: none; -webkit-user-select: none;
+    -khtml-user-select: none; -moz-user-select: none;
+    -ms-user-select: none; user-select: none;
+    padding: 0 4px; width: 4em;
+  }
+pre.numberSource { margin-left: 3em;  padding-left: 4px; }
+div.sourceCode
+  {   }
+@media screen {
+pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
+}
+</style>
+
+
+<script src="../site_libs/quarto-nav/quarto-nav.js"></script>
+<script src="../site_libs/quarto-nav/headroom.min.js"></script>
+<script src="../site_libs/clipboard/clipboard.min.js"></script>
+<script src="../site_libs/quarto-search/autocomplete.umd.js"></script>
+<script src="../site_libs/quarto-search/fuse.min.js"></script>
+<script src="../site_libs/quarto-search/quarto-search.js"></script>
+<meta name="quarto:offset" content="../">
+<link href="../assets/favicon.png" rel="icon" type="image/png">
+<script src="../site_libs/quarto-html/quarto.js"></script>
+<script src="../site_libs/quarto-html/popper.min.js"></script>
+<script src="../site_libs/quarto-html/tippy.umd.min.js"></script>
+<script src="../site_libs/quarto-html/anchor.min.js"></script>
+<link href="../site_libs/quarto-html/tippy.css" rel="stylesheet">
+<link href="../site_libs/quarto-html/quarto-syntax-highlighting.css" rel="stylesheet" id="quarto-text-highlighting-styles">
+<script src="../site_libs/bootstrap/bootstrap.min.js"></script>
+<link href="../site_libs/bootstrap/bootstrap-icons.css" rel="stylesheet">
+<link href="../site_libs/bootstrap/bootstrap.min.css" rel="stylesheet" id="quarto-bootstrap" data-mode="light">
+<script id="quarto-search-options" type="application/json">{
+  "location": "navbar",
+  "copy-button": false,
+  "collapse-after": 3,
+  "panel-placement": "end",
+  "type": "overlay",
+  "limit": 50,
+  "keyboard-shortcut": [
+    "f",
+    "/",
+    "s"
+  ],
+  "show-item-context": false,
+  "language": {
+    "search-no-results-text": "No results",
+    "search-matching-documents-text": "matching documents",
+    "search-copy-link-title": "Copy link to search",
+    "search-hide-matches-text": "Hide additional matches",
+    "search-more-match-text": "more match in this document",
+    "search-more-matches-text": "more matches in this document",
+    "search-clear-button-title": "Clear",
+    "search-text-placeholder": "",
+    "search-detached-cancel-button-title": "Cancel",
+    "search-submit-button-title": "Submit",
+    "search-label": "Search"
+  }
+}</script>
+<style>html{ scroll-behavior: smooth; }</style>
+<style>
+
+      .quarto-title-block .quarto-title-banner {
+        background-image: url(../assets/images/banner.jpg);
+background-size: cover;
+      }
+</style>
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin="">
+<link href="https://fonts.googleapis.com/css2?family=Fira+Mono&amp;family=Nunito:ital,wght@0,400;0,500;0,600;1,400;1,500;1,600&amp;display=swap" rel="stylesheet">
+
+
+</head>
+
+<body class="nav-fixed">
+
+<div id="quarto-search-results"></div>
+  <header id="quarto-header" class="headroom fixed-top quarto-banner">
+    <nav class="navbar navbar-expand-lg " data-bs-theme="dark">
+      <div class="navbar-container container-fluid">
+      <div class="navbar-brand-container mx-auto">
+    <a href="../index.html" class="navbar-brand navbar-brand-logo">
+    <img src="../assets/logos/nbis-scilifelab.png" alt="logo" class="navbar-logo">
+    </a>
+  </div>
+            <div id="quarto-search" class="" title="Search"></div>
+          <button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbarCollapse" aria-controls="navbarCollapse" aria-expanded="false" aria-label="Toggle navigation" onclick="if (window.quartoToggleHeadroom) { window.quartoToggleHeadroom(); }">
+  <span class="navbar-toggler-icon"></span>
+</button>
+          <div class="collapse navbar-collapse" id="navbarCollapse">
+            <ul class="navbar-nav navbar-nav-scroll ms-auto">
+  <li class="nav-item">
+    <a class="nav-link" href="../index.html"> 
+<span class="menu-text">Home</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_schedule.html"> 
+<span class="menu-text">Schedule</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_contents.html"> 
+<span class="menu-text">Contents</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_syllabus.html"> 
+<span class="menu-text">Syllabus</span></a>
+  </li>  
+  <li class="nav-item">
+    <a class="nav-link" href="../home_precourse.html"> 
+<span class="menu-text">Pre-course</span></a>
+  </li>  
+</ul>
+          </div> <!-- /navcollapse -->
+          <div class="quarto-navbar-tools">
+</div>
+      </div> <!-- /container-fluid -->
+    </nav>
+</header>
+<!-- content -->
+<header id="title-block-header" class="quarto-title-block default page-columns page-full">
+  <div class="quarto-title-banner page-columns page-full">
+    <div class="quarto-title column-body">
+      <h1 class="title">Working with Snakemake</h1>
+            <p class="subtitle lead">How to create reproducible workflows and computational pipelines</p>
+                      </div>
+  </div>
+    
+  
+  <div class="quarto-title-meta">
+
+      
+      <div>
+      <div class="quarto-title-meta-heading">Published</div>
+      <div class="quarto-title-meta-contents">
+        <p class="date">15-Oct-2024</p>
+      </div>
+    </div>
+    
+      
+    </div>
+    
+  
+  </header><div id="quarto-content" class="quarto-container page-columns page-rows-contents page-layout-article page-navbar">
+<!-- sidebar -->
+<!-- margin-sidebar -->
+    <div id="quarto-margin-sidebar" class="sidebar margin-sidebar">
+        <nav id="TOC" role="doc-toc" class="toc-active">
+    <h2 id="toc-title">On this page</h2>
+   
+  <ul>
+  <li><a href="#introduction" id="toc-introduction" class="nav-link active" data-scroll-target="#introduction"><span class="header-section-number">1</span> Introduction</a></li>
+  <li><a href="#the-basics" id="toc-the-basics" class="nav-link" data-scroll-target="#the-basics"><span class="header-section-number">2</span> The basics</a></li>
+  <li><a href="#visualising-workflows" id="toc-visualising-workflows" class="nav-link" data-scroll-target="#visualising-workflows"><span class="header-section-number">3</span> Visualising workflows</a></li>
+  <li><a href="#the-mrsa-workflow" id="toc-the-mrsa-workflow" class="nav-link" data-scroll-target="#the-mrsa-workflow"><span class="header-section-number">4</span> The MRSA workflow</a></li>
+  <li><a href="#parameters" id="toc-parameters" class="nav-link" data-scroll-target="#parameters"><span class="header-section-number">5</span> Parameters</a></li>
+  <li><a href="#logs" id="toc-logs" class="nav-link" data-scroll-target="#logs"><span class="header-section-number">6</span> Logs</a></li>
+  <li><a href="#temporary-files" id="toc-temporary-files" class="nav-link" data-scroll-target="#temporary-files"><span class="header-section-number">7</span> Temporary files</a></li>
+  <li><a href="#targets" id="toc-targets" class="nav-link" data-scroll-target="#targets"><span class="header-section-number">8</span> Targets</a></li>
+  <li><a href="#shadow-rules" id="toc-shadow-rules" class="nav-link" data-scroll-target="#shadow-rules"><span class="header-section-number">9</span> Shadow rules</a></li>
+  <li><a href="#generalising-workflows" id="toc-generalising-workflows" class="nav-link" data-scroll-target="#generalising-workflows"><span class="header-section-number">10</span> Generalising workflows</a></li>
+  <li><a href="#reading-samples-from-a-file-instead-of-hard-coding-them" id="toc-reading-samples-from-a-file-instead-of-hard-coding-them" class="nav-link" data-scroll-target="#reading-samples-from-a-file-instead-of-hard-coding-them"><span class="header-section-number">11</span> Reading samples from a file instead of hard-coding them</a></li>
+  <li><a href="#extra-material" id="toc-extra-material" class="nav-link" data-scroll-target="#extra-material"><span class="header-section-number">12</span> Extra material</a>
+  <ul>
+  <li><a href="#using-containers-in-snakemake" id="toc-using-containers-in-snakemake" class="nav-link" data-scroll-target="#using-containers-in-snakemake"><span class="header-section-number">12.1</span> Using containers in Snakemake</a></li>
+  <li><a href="#running-snakemake-workflows-on-hpc-clusters" id="toc-running-snakemake-workflows-on-hpc-clusters" class="nav-link" data-scroll-target="#running-snakemake-workflows-on-hpc-clusters"><span class="header-section-number">12.2</span> Running Snakemake workflows on HPC clusters</a>
+  <ul class="collapse">
+  <li><a href="#option-1-run-the-entire-workflow-as-a-single-job" id="toc-option-1-run-the-entire-workflow-as-a-single-job" class="nav-link" data-scroll-target="#option-1-run-the-entire-workflow-as-a-single-job"><span class="header-section-number">12.2.1</span> Option 1: Run the entire workflow as a single job</a></li>
+  <li><a href="#option-2-use-built-in-slurm-support" id="toc-option-2-use-built-in-slurm-support" class="nav-link" data-scroll-target="#option-2-use-built-in-slurm-support"><span class="header-section-number">12.2.2</span> Option 2: Use built-in SLURM support</a></li>
+  </ul></li>
+  <li><a href="#specifying-resources-for-slurm" id="toc-specifying-resources-for-slurm" class="nav-link" data-scroll-target="#specifying-resources-for-slurm"><span class="header-section-number">12.3</span> Specifying resources for SLURM</a></li>
+  </ul></li>
+  </ul>
+</nav>
+    </div>
+<!-- main -->
+<main class="content quarto-banner-title-block" id="quarto-document-content">
+
+
+
+
+
+
+<section id="introduction" class="level2" data-number="1">
+<h2 data-number="1" class="anchored" data-anchor-id="introduction"><span class="header-section-number">1</span> Introduction</h2>
+<p>A <em>workflow management system</em> (WfMS) is a piece of software that sets up, performs and monitors a defined sequence of computational tasks (<em>i.e.</em> “a workflow”). Snakemake is a WfMS that was developed in the bioinformatics community, and as such it has a number of features that make it particularly well-suited for creating reproducible and scalable data analyses.</p>
+<p>First of all the language you use to formulate your workflows is based on Python, which is a language with strong standing in academia. However, users are not required to know how to code in Python to work efficiently with Snakemake. Workflows can easily be scaled from your desktop to server, cluster, grid or cloud environments. This makes it possible to develop a workflow on your laptop, maybe using only a small subset of your data, and then run the real analysis on a cluster. Snakemake also has several features for defining the environment with which each task is carried out. This is important in bioinformatics, where workflows often involve running a large number of small third-party tools.</p>
+<p>Snakemake is primarily intended to work on <em>files</em> (rather than for example streams, reading/writing from databases or passing variables in memory). This fits well with many fields of bioinformatics, notably next-generation sequencing, that often involve computationally expensive operations on large files. It’s also a good fit for a scientific research setting, where the exact specifications of the final workflow aren’t always known at the beginning of a project.</p>
+<p>Lastly, a WfMS is a very important tool for making your analyses reproducible. By keeping track of when each file was generated, and by which operation, it is possible to ensure that there is a consistent “paper trail” from raw data to final results. Snakemake also has features that allow you to package and distribute the workflow, and any files it involves, once it’s done.</p>
+<p>This tutorial depends on files from the course GitHub repo. Take a look at the <a href="pre-course-setup">setup</a> for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to <code>workshop-reproducible-research/tutorials/snakemake</code> and activate your <code>snakemake-env</code> Conda environment.</p>
+</section>
+<section id="the-basics" class="level2" data-number="2">
+<h2 data-number="2" class="anchored" data-anchor-id="the-basics"><span class="header-section-number">2</span> The basics</h2>
+<p>In this part of the tutorial we will create a very simple workflow from scratch, in order to show the fundamentals of how Snakemake works. The workflow will take two files as inputs, <code>a.txt</code> and <code>b.txt</code>, and the purpose is to convert the text in the files to upper case and then to concatenate them.</p>
+<p>Run the following shell commands. The first one will make an empty file named <code>Snakefile</code>, which will later contain the workflow. The second and third commands generate two files containing some arbitrary text.</p>
+<div class="sourceCode" id="cb1"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">touch</span> Snakefile</span>
+<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="st">"This is a.txt"</span> <span class="op">&gt;</span> a.txt</span>
+<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a><span class="bu">echo</span> <span class="st">"This is b.txt"</span> <span class="op">&gt;</span> b.txt</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Then open <code>Snakefile</code> in your favourite text editor. A Snakemake workflow is based on rules which take some file(s) as input, performs some type of operation on them, and generate some file(s) as outputs. Here is a very simple rule that produces <code>a.upper.txt</code> as an output, using <code>a.txt</code> as input. Copy this rule to your <code>Snakefile</code> and save it.</p>
+<div class="sourceCode" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>rule convert_to_upper_case:</span>
+<span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"a.upper.txt"</span></span>
+<span id="cb2-4"><a href="#cb2-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb2-5"><a href="#cb2-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"a.txt"</span></span>
+<span id="cb2-6"><a href="#cb2-6" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb2-7"><a href="#cb2-7" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb2-8"><a href="#cb2-8" aria-hidden="true" tabindex="-1"></a><span class="co">        tr [a-z] [A-Z] &lt; {input} &gt; {output}</span></span>
+<span id="cb2-9"><a href="#cb2-9" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Indentation is important in Snakefiles, so make sure that you have the correct number of spaces before <code>input</code>/<code>output</code>/<code>shell</code> and their respective subsections. The number of spaces per level doesn’t matter as long as you’re consistent. Here we use four, but you could just as well use two for a more compact look. Don’t use tabs (unless your editor automatically converts them to spaces).</p>
+</div>
+</div>
+<p>Rules can be given names, here it’s <code>convert_to_upper_case</code>. While rule names are not strictly necessary we encourage you to use them and to make an effort to name your rules in a way that makes it easy to understand the purpose of the rule, as rule names are one of the main ways to interact with the workflow. The <code>shell</code> section (or directive) contains the shell commands that will convert the text in the input file to upper case and send it to the output file. In the shell command string, we can refer to elements of the rule via curly brackets. Here, we refer to the output file by specifying <code>{output}</code> and to the input file by specifying <code>{input}</code>. If you’re not very familiar with Bash, this particular command can be read like “send the contents of <code>a.txt</code> to the program <code>tr</code>, which will convert all characters in the set <code>[a-z]</code> to the corresponding character in the set <code>[A-Z]</code>, and then send the output to <code>a.upper.txt</code>”.</p>
+<p>Now let’s run our first Snakemake workflow. When a workflow is executed Snakemake tries to generate a set of target files. Target files can be specified via the command line (or, as you will see later, in several other ways). Here we ask Snakemake to make the file <code>a.upper.txt</code>. We can specify the file containing our rules with <code>-s</code> but since the default behaviour of Snakemake is to look for a file called <code>Snakefile</code> in either the working directory or in a subdirectory called <code>workflow/</code> we don’t need to specify that here. It’s good practice to first run with the flag <code>-n</code> (or <code>--dry-run</code>), which will show what Snakemake plans to do without actually running anything, and you also need to specify how many cores to be used for the workflow with <code>--cores</code> or <code>-c</code>. For now, you only need 1 so set <code>-c 1</code>. You can also use the flag <code>-p</code>, for showing the shell commands that it will execute, and the flag <code>-r</code> for showing the reason for running a specific rule. <code>snakemake --help</code> will show you all available flags.</p>
+<pre class="no-highlight"><code>$ snakemake -n -c 1 -r -p a.upper.txt
+
+Building DAG of jobs...
+Job stats:
+job                      count    min threads    max threads
+---------------------  -------  -------------  -------------
+convert_to_upper_case        1              1              1
+total                        1              1              1
+
+
+[Mon Oct 25 16:48:43 2021]
+rule convert_to_upper_case:
+    input: a.txt
+    output: a.upper.txt
+    jobid: 0
+    reason: Missing output files: a.upper.txt
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+
+        tr [a-z] [A-Z] &lt; a.txt &gt; a.upper.txt
+
+Job stats:
+job                      count    min threads    max threads
+---------------------  -------  -------------  -------------
+convert_to_upper_case        1              1              1
+total                        1              1              1
+
+This was a dry-run (flag -n). The order of jobs does not reflect the order of execution.</code></pre>
+<p>You can see that Snakemake plans to run one job: the rule <code>convert_to_upper_case</code> with <code>a.txt</code> as input and <code>a.upper.txt</code> as output. The reason for doing this is that it’s missing the file <code>a.upper.txt</code>. Now execute the workflow without the <code>-n</code> flag and check that the contents of <code>a.upper.txt</code> is as expected. Then try running the same command again. What do you see? It turns out that Snakemake only reruns jobs if there have been changes to either <strong>the input files, or the workflow itself</strong>. This is how Snakemake ensures that everything in the workflow is up to date. We will get back to this shortly.</p>
+<p>What if we ask Snakemake to generate the file <code>b.upper.txt</code>?</p>
+<pre class="no-highlight"><code>$ snakemake -n -c 1 -r -p b.upper.txt
+
+Building DAG of jobs...
+MissingRuleException:
+No rule to produce b.upper.txt (if you use input functions make sure that they don't raise unexpected exceptions).</code></pre>
+<p>That didn’t work well. We could copy the rule to make a similar one for <code>b.txt</code>, but that would be a bit cumbersome. Here is where named wildcards come in; one of the most powerful features of Snakemake. Simply change the input from <code>input: "a.txt"</code> to <code>input: "{some_name}.txt"</code> and the output to <code>output: "{some_name}.upper.txt"</code>. Now try asking for <code>b.upper.txt</code> again.</p>
+<p>Tada! What happens here is that Snakemake looks at all the rules it has available (actually only one in this case) and tries to assign values to all wildcards so that the targeted files can be generated. In this case it was quite simple, you can see that it says that <code>wildcards: some_name=b</code>, but for large workflows and multiple wildcards it can get much more complex. Named wildcards is what enables a workflow (or single rules) to be efficiently generalized and reused between projects or shared between people.</p>
+<p>It seems we have the first part of our workflow working, now it’s time to make the second rule for concatenating the outputs from <code>convert_to_upper_case</code>. The rule structure will be similar; the only difference is that here we have two inputs instead of one. This can be expressed in two ways, either with named inputs like this:</p>
+<div class="sourceCode" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    firstFile<span class="op">=</span><span class="st">"..."</span>,</span>
+<span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    secondFile<span class="op">=</span><span class="st">"..."</span></span>
+<span id="cb5-4"><a href="#cb5-4" aria-hidden="true" tabindex="-1"></a>shell:</span>
+<span id="cb5-5"><a href="#cb5-5" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb5-6"><a href="#cb5-6" aria-hidden="true" tabindex="-1"></a><span class="co">    some_function {input.firstFile} {input.secondFile}</span></span>
+<span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Or with indexes like this:</p>
+<div class="sourceCode" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"..."</span>,</span>
+<span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="co">"..."</span></span>
+<span id="cb6-4"><a href="#cb6-4" aria-hidden="true" tabindex="-1"></a>shell:</span>
+<span id="cb6-5"><a href="#cb6-5" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb6-6"><a href="#cb6-6" aria-hidden="true" tabindex="-1"></a><span class="co">    some_function {input[0]} {input[1]}</span></span>
+<span id="cb6-7"><a href="#cb6-7" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you have multiple inputs or outputs they need to be delimited with a comma (as seen above). This is a very common mistake when writing Snakemake workflows. The parser will complain, but sometimes the error message can be difficult to interpret.</p>
+</div>
+</div>
+<p>Now try to construct this rule yourself and name it <code>concatenate_a_and_b</code>. The syntax for concatenating two files in Bash is <code>cat first_file.txt second_file.txt &gt; output_file.txt</code>. Call the output <code>c.txt</code>. Run the workflow in Snakemake and validate that the output looks as expected.</p>
+<p>Wouldn’t it be nice if our workflow could be used for <em>any</em> files, not just <code>a.txt</code> and <code>b.txt</code>? We can achieve this by using named wildcards (or in other ways as we will discuss later). As we’ve mentioned, Snakemake looks at all the rules it has available and tries to assign values to all wildcards so that the targeted files can be generated. We therefore have to name the output file in a way so that it also contains information about which input files it should be based on. Try to figure out how to do this yourself. If you’re stuck you can look at the spoiler below, but spend some time on it before you look. Also rename the rule to <code>concatenate_files</code> to reflect its new more general use.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-3-contents" aria-controls="callout-3" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-3" class="callout-3-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<div class="sourceCode" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>rule concatenate_files:</span>
+<span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{first}_{second}.txt"</span></span>
+<span id="cb7-4"><a href="#cb7-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb7-5"><a href="#cb7-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{first}.upper.txt"</span>,</span>
+<span id="cb7-6"><a href="#cb7-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{second}.upper.txt"</span></span>
+<span id="cb7-7"><a href="#cb7-7" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb7-8"><a href="#cb7-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb7-9"><a href="#cb7-9" aria-hidden="true" tabindex="-1"></a><span class="co">        cat {input[0]} {input[1]} &gt; {output}</span></span>
+<span id="cb7-10"><a href="#cb7-10" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</div>
+<p>We can now control which input files to use by the name of the file we ask Snakemake to generate. Run the workflow without the flag <code>-n</code> (or <code>--dry-run</code>) to execute both rules, providing one core with <code>-c 1</code> (or <code>--cores 1</code>):</p>
+<pre class="no-highlight"><code>$ snakemake a_b.txt -c 1
+
+Building DAG of jobs...
+Using shell: /bin/bash
+Provided cores: 1 (use --cores to define parallelism)
+Rules claiming more threads will be scaled down.
+Job stats:
+job                      count    min threads    max threads
+---------------------  -------  -------------  -------------
+concatenate_files            1              1              1
+convert_to_upper_case        2              1              1
+total                        3              1              1
+
+Select jobs to execute...
+
+[Mon Oct 25 16:51:52 2021]
+rule convert_to_upper_case:
+    input: b.txt
+    output: b.upper.txt
+    jobid: 2
+    wildcards: some_name=b
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+[Mon Oct 25 16:51:53 2021]
+Finished job 2.
+1 of 3 steps (33%) done
+Select jobs to execute...
+
+[Mon Oct 25 16:51:53 2021]
+rule convert_to_upper_case:
+    input: a.txt
+    output: a.upper.txt
+    jobid: 1
+    wildcards: some_name=a
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+[Mon Oct 25 16:51:53 2021]
+Finished job 1.
+2 of 3 steps (67%) done
+Select jobs to execute...
+
+[Mon Oct 25 16:51:53 2021]
+rule concatenate_files:
+    input: a.upper.txt, b.upper.txt
+    output: a_b.txt
+    jobid: 0
+    wildcards: first=a, second=b
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+[Mon Oct 25 16:51:53 2021]
+Finished job 0.
+3 of 3 steps (100%) done</code></pre>
+<p>Neat!</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>You can name a file whatever you want in a Snakemake workflow, but you will find that everything falls into place much nicer if the filename reflects the file’s path through the workflow, <em>e.g.</em> <code>sample_a.trimmed.deduplicated.sorted.bam</code>.</p>
+</div>
+</div>
+<p>The input to Snakemake rules have to be strings or lists of strings, however you don’t have to specify these strings directly in the <code>input:</code> section of rules. Instead, you can specify Python functions that return strings or lists of strings. This allows you to supply input to rules that can vary depending on the wildcards being used. We’ll get to why that’s useful in a sec, but first let’s put it to use for the <code>conatenate_files</code> rule. Because Snakemake is based on Python we can mix rule definitions with standard python code in the same file. Add a function just above the <code>concatenate_files</code> that looks like this:</p>
+<div class="sourceCode" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> concat_input(wildcards):</span>
+<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>    files <span class="op">=</span> [wildcards.first <span class="op">+</span> <span class="st">".upper.txt"</span>, wildcards.second <span class="op">+</span> <span class="st">".upper.txt"</span>]</span>
+<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> files</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This is the syntax to define a function in Python. The <code>def concat_input(wildcards):</code> line shows the name of the function (<code>concat_input</code>) and the variable passed to the function (the <code>wildcards</code> object). In the second line we add two items to a list that we call <code>files</code> and add the ‘.upper.txt’ suffix to each item. Finally, the function returns the list. Because the <code>concatenate_files</code> rule has two wildcards <code>{first}</code> and <code>{second}</code> we can access the actual strings in the <code>wildcards</code> object using <code>wildcards.first</code> and <code>wildcards.second</code>. When we ask for the file <code>a_b.txt</code> then <code>wildcards.first == 'a'</code> and <code>wildcards.second == 'b'</code>. This means that the <code>files</code> list returned by the function will be <code>['a.upper.txt', 'b.upper.txt']</code>. To see for yourself you can add the following line to the function, just before the return statement: <code>print (wildcards.first, wildcards.second, files)</code>. This way the wildcard values and the list will be printed to the terminal when you run Snakemake.</p>
+<p>Now that we’ve defined the function to use as input, we can use it in the <code>concatenate_files</code> rule. Update the rule so that it looks like this:</p>
+<div class="sourceCode" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>rule concatenate_files:</span>
+<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{first}_{second}.txt"</span></span>
+<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb10-5"><a href="#cb10-5" aria-hidden="true" tabindex="-1"></a>        concat_input</span>
+<span id="cb10-6"><a href="#cb10-6" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb10-7"><a href="#cb10-7" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb10-8"><a href="#cb10-8" aria-hidden="true" tabindex="-1"></a><span class="co">        cat {input[0]} {input[1]} &gt; {output}</span></span>
+<span id="cb10-9"><a href="#cb10-9" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You see that the name of the function <code>concat_input</code> is added in place of the input strings. When using the <code>wildcards</code> object in input functions like this we have to call the function without any arguments (simply <code>concat_input</code>) and the function has to be defined to accept a single argument (here <code>def concat_input(wildcards):</code>). Let’s run the workflow with the updated rule. Remove the file <code>a_b.txt</code> or add <code>-f</code> to the Snakemake command to force a re-run:</p>
+<div class="sourceCode" id="cb11"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> a_b.txt <span class="at">-c</span> 1 <span class="at">-f</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If you added the <code>print</code> statement to the function you should see the following printed to your terminal:</p>
+<div class="sourceCode" id="cb12"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="ex">Building</span> DAG of jobs...</span>
+<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="ex">a</span> b [<span class="st">'a.upper.txt'</span>, <span class="st">'b.upper.txt'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Followed by the rest of the workflow output.</p>
+<p>There are a number of possible use-cases for input functions. For example, say that you have an experiment where you’ve sequenced three samples: <code>sample1</code>, <code>sample2</code> and <code>sample3</code> with the corresponding FASTQ files under <code>data/</code> and you want to write a rule that outputs the statistics of all sequences within each sample. However, samples <code>sample1</code> and <code>sample2</code> have been sequenced with single-end technology while <code>sample3</code> have paired-end reads. The single-end samples will have only one FASTQ file whereas the paired-end sample will have two (one for each sequenced end). Thus, depending on the name of the sample the input to the function will either be one file or two. With input functions we can write a generalized rule that can handle both types:</p>
+<div class="sourceCode" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> fastq_input(wildcards):</span>
+<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> wildcards.sample_id <span class="kw">in</span> [<span class="st">"sample1"</span>, <span class="st">"sample2"</span>]:</span>
+<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> <span class="st">"data/"</span> <span class="op">+</span> wildcards.sample_id <span class="op">+</span> <span class="st">".fastq.gz"</span></span>
+<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a>    <span class="cf">else</span>:</span>
+<span id="cb13-5"><a href="#cb13-5" aria-hidden="true" tabindex="-1"></a>        <span class="cf">return</span> [<span class="st">"data/"</span> <span class="op">+</span> wildcards.sample_id <span class="op">+</span> <span class="st">".R1.fastq.gz"</span>,</span>
+<span id="cb13-6"><a href="#cb13-6" aria-hidden="true" tabindex="-1"></a>                <span class="st">"data/"</span> <span class="op">+</span> wildcards.sample_id <span class="op">+</span> <span class="st">".R2.fastq.gz"</span>]</span>
+<span id="cb13-7"><a href="#cb13-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb13-8"><a href="#cb13-8" aria-hidden="true" tabindex="-1"></a>rule fastq_stats:</span>
+<span id="cb13-9"><a href="#cb13-9" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb13-10"><a href="#cb13-10" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{sample_id}.stats.txt"</span></span>
+<span id="cb13-11"><a href="#cb13-11" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb13-12"><a href="#cb13-12" aria-hidden="true" tabindex="-1"></a>        fastq_input</span>
+<span id="cb13-13"><a href="#cb13-13" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb13-14"><a href="#cb13-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb13-15"><a href="#cb13-15" aria-hidden="true" tabindex="-1"></a><span class="co">        seqtk comp {input} &gt; {output}</span></span>
+<span id="cb13-16"><a href="#cb13-16" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>As you can see, the <code>fastq_stats</code> rule outputs one file <code>{sample_id}.stats.txt</code> and takes as input the value returned from the <code>fastq_input</code> function. In this function the sample id is evaluated and if it is either <code>sample1</code> or <code>sample2</code> (our single-end samples) then the function returns a single string which is the path to the FASTQ file for that sample. Otherwise, the function returns a list containing both the <code>R1</code> and <code>R2</code> files for the sample. In the <code>shell:</code> directive of the rule the <code>seqtk comp</code> command is run on the input and the output is sent to the output file.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How a simple Snakemake rule looks.</li>
+<li>How to define target files when executing a workflow.</li>
+<li>How to use named wildcards for writing generic and flexible rules.</li>
+<li>How to use input functions in rules</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="visualising-workflows" class="level2" data-number="3">
+<h2 data-number="3" class="anchored" data-anchor-id="visualising-workflows"><span class="header-section-number">3</span> Visualising workflows</h2>
+<p>All that we’ve done so far could quite easily be done in a simple shell script that takes the input files as parameters. Let’s now take a look at some of the features where a WfMS like Snakemake really adds value compared to a more straightforward approach. One such feature is the possibility to visualize your workflow. Snakemake can generate three types of graphs, one that shows how the rules are connected, one that shows how the jobs (<em>i.e.</em> an execution of a rule with some given inputs/outputs/settings) are connected, and finally one that shows rules with their respective input/output files.</p>
+<p>First we look at the rule graph. The following command will generate a rule graph in the dot language and pipe it to the program <code>dot</code>, which in turn will save a visualization of the graph as a PNG file (if you’re having troubles displaying PNG files you could use SVG or JPG instead).</p>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you added the <code>print(wildcards.first,wildcards.second,files)</code> statement to the <code>concat_input</code> function in the previous section you need to remove that line before running the commands below.</p>
+</div>
+</div>
+<div class="sourceCode" id="cb14"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--rulegraph</span> a_b.txt <span class="kw">|</span> <span class="ex">dot</span> <span class="at">-Tpng</span> <span class="op">&gt;</span> rulegraph.png</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p><img src="images/rulegraph.svg" class="img-fluid"></p>
+<p>This looks simple enough, the output from the rule <code>convert_to_upper_case</code> will be used as input to the rule <code>concatenate_files</code>.</p>
+<p>For a more typical bioinformatics project it can look something like this when you include all the rules from processing of the raw data to generating figures for the paper.</p>
+<p><img src="images/rulegraph_complex.svg" class="img-fluid"></p>
+<p>While saying that it’s easy to read might be a bit of a stretch, it definitely gives you a better overview of the project than you would have without a WfMS.</p>
+<p>The second type of graph is based on the jobs, and looks like this for our little workflow (use <code>--dag</code> instead of <code>--rulegraph</code>).</p>
+<div class="sourceCode" id="cb15"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--dag</span> a_b.txt <span class="kw">|</span> <span class="ex">dot</span> <span class="at">-Tpng</span> <span class="op">&gt;</span> jobgraph.png</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p><img src="images/jobgraph.svg" class="img-fluid"></p>
+<p>The main difference here is that now each node is a job instead of a rule. You can see that the wildcards used in each job are also displayed. Another difference is the dotted lines around the nodes. A dotted line is Snakemake’s way of indicating that this rule doesn’t need to be rerun in order to generate <code>a_b.txt</code>. Validate this by running <code>snakemake -n -r a_b.txt</code> and it should say that there is nothing to be done.</p>
+<p>We’ve discussed before that one of the main purposes of using a WfMS is that it automatically makes sure that everything is up to date. This is done by recursively checking that outputs are always newer than inputs for all the rules involved in the generation of your target files. Now try to change the contents of <code>a.txt</code> to some other text and save it. What do you think will happen if you run <code>snakemake -n -r a_b.txt</code> again?</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-7-contents" aria-controls="callout-7" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-7" class="callout-7-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<pre class="no-highlight"><code>$ snakemake -n -r a_b.txt
+
+Building DAG of jobs...
+Job stats:
+job                      count    min threads    max threads
+---------------------  -------  -------------  -------------
+concatenate_files            1              1              1
+convert_to_upper_case        1              1              1
+total                        2              1              1
+
+
+[Mon Oct 25 17:00:02 2021]
+rule convert_to_upper_case:
+    input: a.txt
+    output: a.upper.txt
+    jobid: 1
+    reason: Updated input files: a.txt
+    wildcards: some_name=a
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+
+[Mon Oct 25 17:00:02 2021]
+rule concatenate_files:
+    input: a.upper.txt, b.upper.txt
+    output: a_b.txt
+    jobid: 0
+    reason: Input files updated by another job: a.upper.txt
+    wildcards: first=a, second=b
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+Job stats:
+job                      count    min threads    max threads
+---------------------  -------  -------------  -------------
+concatenate_files            1              1              1
+convert_to_upper_case        1              1              1
+total                        2              1              1
+
+This was a dry-run (flag -n). The order of jobs does not reflect the order of execution.</code></pre>
+</div>
+</div>
+</div>
+<p>Were you correct? Also generate the job graph and compare to the one generated above. What’s the difference? Now rerun without <code>-n</code> and validate that <code>a_b.txt</code> contains the new text (don’t forget to specify <code>-c 1</code>). Note that Snakemake doesn’t look at the contents of files when trying to determine what has changed, only at the timestamp for when they were last modified.</p>
+<p>We’ve seen that Snakemake keeps track of if files in the workflow have changed, and automatically makes sure that any results depending on such files are regenerated. What about if the rules themselves are changed? It turns out that since version 7.8.0 Snakemake keeps track of this automatically.</p>
+<p>Let’s say that we want to modify the rule <code>concatenate_files</code> to also include which files were concatenated.</p>
+<div class="sourceCode" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>rule concatenate_files:</span>
+<span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{first}_{second}.txt"</span></span>
+<span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb17-5"><a href="#cb17-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{first}.upper.txt"</span>,</span>
+<span id="cb17-6"><a href="#cb17-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"{second}.upper.txt"</span></span>
+<span id="cb17-7"><a href="#cb17-7" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb17-8"><a href="#cb17-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb17-9"><a href="#cb17-9" aria-hidden="true" tabindex="-1"></a><span class="co">        echo 'Concatenating {input}' | cat - {input[0]} {input[1]} &gt; {output}</span></span>
+<span id="cb17-10"><a href="#cb17-10" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>It’s not really important for the tutorial, but the shell command used here first outputs “Concatenating” followed by a space delimited list of the files in <code>input</code>. This string is then sent to the program <code>cat</code> where it’s concatenated with <code>input[0]</code> and <code>input[1]</code> (the parameter <code>-</code> means that it should read from standard input). Lastly, the output from <code>cat</code> is sent to <code>{output}</code>.</p>
+</div>
+</div>
+<p>If you now run the workflow as before you should see:</p>
+<div class="sourceCode" id="cb18"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="ex">rule</span> concatenate_files:</span>
+<span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>    <span class="ex">input:</span> a.upper.txt, b.upper.txt</span>
+<span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>    <span class="ex">output:</span> a_b.txt</span>
+<span id="cb18-4"><a href="#cb18-4" aria-hidden="true" tabindex="-1"></a>    <span class="ex">jobid:</span> 0</span>
+<span id="cb18-5"><a href="#cb18-5" aria-hidden="true" tabindex="-1"></a>    <span class="ex">reason:</span> Code has changed since last execution</span>
+<span id="cb18-6"><a href="#cb18-6" aria-hidden="true" tabindex="-1"></a>    <span class="ex">wildcards:</span> first=a, second=b</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Because although no files involved in the workflow have been changed, Snakemake recognizes that the workflow code itself has been modified and this triggers a re-run.</p>
+<p>Snakemake is aware of changes to four categories of such “rerun-triggers”: “input” (changes to rule input files), “params” (changes to the rule <code>params</code> section), “software-env” (changes to Conda environment files specified by the <code>conda:</code> directive) and “code” (changes to code in the <code>shell:</code>, <code>run:</code>, <code>script:</code> and <code>notebook:</code> directives).</p>
+<p>Prior to version 7.8.0, only changes to the modification time of input files would trigger automatic re-runs. To run Snakemake with this previous behaviour you can use the setting <code>--rerun-triggers mtime</code> at the command line. Change the <code>shell:</code> section of the <code>concatenate_files</code> rule back to the previous version, then try running: <code>snakemake -n -r a_b.txt --rerun-triggers mtime</code> and you should again see <code>Nothing to be done (all requested files are present and up to date).</code></p>
+<p>You can also export information on how all files were generated (when, by which rule, which version of the rule, and by which commands) to a tab-delimited file like this:</p>
+<div class="sourceCode" id="cb19"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> a_b.txt <span class="at">-c</span> 1 <span class="at">-D</span> <span class="op">&gt;</span> summary.tsv</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The content of <code>summary.tsv</code> is shown in the table below:</p>
+<div class="table-responsive">
+<table class="table">
+<colgroup>
+<col style="width: 7%">
+<col style="width: 13%">
+<col style="width: 11%">
+<col style="width: 5%">
+<col style="width: 7%">
+<col style="width: 12%">
+<col style="width: 18%">
+<col style="width: 14%">
+<col style="width: 8%">
+</colgroup>
+<thead>
+<tr class="header">
+<th>output_file</th>
+<th>date</th>
+<th>rule</th>
+<th>version</th>
+<th>log-file(s)</th>
+<th>input-file(s)</th>
+<th>shellcmd</th>
+<th>status</th>
+<th>plan</th>
+</tr>
+</thead>
+<tbody>
+<tr class="odd">
+<td>a_b.txt</td>
+<td>Mon Oct 25 17:01:46 2021</td>
+<td>concatenate_files</td>
+<td>-</td>
+<td></td>
+<td>a.upper.txt,b.upper.txt</td>
+<td>cat a.upper.txt b.upper.txt &gt; a_b.txt</td>
+<td>rule implementation changed</td>
+<td>update pending</td>
+</tr>
+<tr class="even">
+<td>a.upper.txt</td>
+<td>Mon Oct 25 17:01:46 2021</td>
+<td>convert_to_upper_case</td>
+<td>-</td>
+<td></td>
+<td>a.txt</td>
+<td>tr [a-z] [A-Z] &lt; a.txt &gt; a.upper.txt</td>
+<td>ok</td>
+<td>no update</td>
+</tr>
+<tr class="odd">
+<td>b.upper.txt</td>
+<td>Mon Oct 25 17:01:46 2021</td>
+<td>convert_to_upper_case</td>
+<td>-</td>
+<td></td>
+<td>b.txt</td>
+<td>tr [a-z] [A-Z] &lt; b.txt &gt; b.upper.txt</td>
+<td>ok</td>
+<td>no update</td>
+</tr>
+</tbody>
+</table>
+</div>
+<p>You can see in the second last column that the rule implementation for <code>a_b.txt</code> has changed. The last column shows if Snakemake plans to regenerate the files when it’s next executed. You can see that for the <code>concatenate_files</code> the plan is <code>update pending</code> because we generated the summary with the default behaviour of using all rerun-triggers.</p>
+<p>You might wonder where Snakemake keeps track of all these things? It stores all information in a hidden subdirectory called <code>.snakemake</code>. This is convenient since it’s easy to delete if you don’t need it anymore and everything is contained in the project directory. Just be sure to add it to <code>.gitignore</code> so that you don’t end up tracking it with git.</p>
+<p>By now you should be familiar with the basic functionality of Snakemake, and you can build advanced workflows with only the features we have discussed here. There’s a lot we haven’t covered though, in particular when it comes to making your workflow more reusable. In the following section we will start with a workflow that is fully functional but not very flexible. We will then gradually improve it, and at the same time showcase some Snakemake features we haven’t discussed yet. Note that this can get a little complex at times, so if you felt that this section was a struggle then you could move on to one of the other tutorials instead.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use <code>--dag</code> and <code>--rulegraph</code> for visualizing the job and rule graphs, respectively.</li>
+<li>How Snakemake reruns relevant parts of the workflow after there have been changes.</li>
+<li>How Snakemake tracks changes to files and code in a workflow</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="the-mrsa-workflow" class="level2" data-number="4">
+<h2 data-number="4" class="anchored" data-anchor-id="the-mrsa-workflow"><span class="header-section-number">4</span> The MRSA workflow</h2>
+<p>As you might remember from the <a href="introduction">intro</a>, we are attempting to understand how lytic bacteriophages can be used as a future therapy for the multi-resistant bacteria MRSA (methicillin-resistant <em>Staphylococcus aureus</em>). In order to do this we have performed RNA-seq of three strains, one test and two controls. We have already set up a draft Snakemake workflow for the RNA-seq analysis and it seems to be running nicely. The rest of the Snakemake tutorial will be spent improving and making this workflow more flexible!</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>This section will leave a little more up to you compared to the previous one. If you get stuck at some point the final workflow after all the modifications is available in <code>tutorials/git/Snakefile</code>.</p>
+</div>
+</div>
+<p>You are probably already in your <code>snakemake-env</code> environment, otherwise activate it (use <code>conda info --envs</code> if you are unsure).</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Here we have one Conda environment for executing the whole Snakemake workflow. Snakemake also supports using explicit Conda environments on a per-rule basis, by specifying something like <code>conda: rule-specific-env.yml</code> in the rule definition and running Snakemake with the <code>--use-conda</code> flag. The given rule will then be run in the Conda environment specified in <code>rule-specific-env.yml</code> that will be created and activated on the fly by Snakemake. Note that by default Snakemake uses <code>conda</code> to generate the rule-specific environments. This behaviour can be changed by running with <code>--conda-frontend conda</code>, which will force Snakemake to use <code>conda</code> instead.</p>
+</div>
+</div>
+<p>Let’s start by generating the rule graph so that we get an overview of the workflow. Here we have to specify the file with the rules using the <code>-s</code> flag to Snakemake since the path to the file differs from the default.</p>
+<div class="sourceCode" id="cb20"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">-s</span> snakefile_mrsa.smk <span class="at">--rulegraph</span> <span class="kw">|</span> <span class="ex">dot</span> <span class="at">-T</span> png <span class="op">&gt;</span> rulegraph_mrsa.png</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>There’s another difference in this command compared to the one we’ve used before, namely that we don’t define a target. In the toy example we used <code>a_b.txt</code> as a target, and the wildcards were resolved based on that. How come that we don’t need to do that here? It turns out that by default Snakemake targets the first rule in a workflow. By convention, we call this rule <code>all</code> and let it serve as a rule for aggregating the main outputs of the workflow.</p>
+<p><img src="images/rulegraph_mrsa.svg" class="img-fluid"></p>
+<p>Now take some time and look through the workflow file and try to understand how the rules fit together. Use the rule graph as aid. The rules represent a quite standard, although somewhat simplified, workflow for RNA-seq analysis. If you are unfamiliar with the purpose of the different operations (index genome, FastQC and so on), then take a look at the <a href="introduction">intro</a>.</p>
+<p>Also generate the job graph in the same manner. Here you can see that three samples will be downloaded: SRR935090, SRR935091, and SRR935092. The original sample files contain tens of millions of reads but for the purpose of this course we have sub-sampled them to 100,000 reads per sample, so that they are easy to manage, and made them available at the <a href="https://figshare.scilifelab.se/articles/educational_resource/MRSA_case_study_example_data/22246417">SciLifeLab Data Repository</a>. These FASTQ files will then be quality controlled with FastQC and aligned to a genome. The QC output will be aggregated with MultiQC and the alignments will be used to generate a count table, <em>i.e.</em> a table that shows how many reads map to each gene for each sample. This count table is then what the downstream analysis will be based on.</p>
+<p><img src="images/dag_mrsa.svg" class="img-fluid"></p>
+<p>Now try to run the whole workflow. Hopefully you see something like this.</p>
+<pre class="no-highlight"><code>Building DAG of jobs...
+Using shell: /bin/bash
+Provided cores: 1 (use --cores to define parallelism)
+Rules claiming more threads will be scaled down.
+Job stats:
+job                     count    min threads    max threads
+--------------------  -------  -------------  -------------
+align_to_genome             3              1              1
+all                         1              1              1
+fastqc                      3              1              1
+generate_count_table        1              1              1
+generate_rulegraph          1              1              1
+get_SRA_by_accession        3              1              1
+get_genome_fasta            1              1              1
+get_genome_gff3             1              1              1
+index_genome                1              1              1
+multiqc                     1              1              1
+sort_bam                    3              1              1
+total                      19              1              1
+
+Select jobs to execute...
+
+[Mon Oct 25 17:13:47 2021]
+rule get_genome_fasta:
+    output: data/ref/NCTC8325.fa.gz
+    jobid: 6
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+--2021-10-25 17:13:48--  ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz
+           =&gt; ‘data/ref/NCTC8325.fa.gz’
+Resolving ftp.ensemblgenomes.org (ftp.ensemblgenomes.org)... 193.62.197.75
+Connecting to ftp.ensemblgenomes.org (ftp.ensemblgenomes.org)|193.62.197.75|:21... connected.
+Logging in as anonymous ... Logged in!
+==&gt; SYST ... done.    ==&gt; PWD ... done.
+.
+.
+[lots of stuff]
+.
+.
+localrule all:
+    input: results/tables/counts.tsv, results/multiqc/multiqc.html, results/rulegraph.png
+    jobid: 0
+    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T
+
+[Mon Oct 25 17:14:38 2021]
+Finished job 0.
+19 of 19 steps (100%) done</code></pre>
+<p>After everything is done, the workflow will have resulted in a bunch of files in the directories <code>data/</code> and <code>results/</code>. Take some time to look through the structure, in particular the quality control reports in <code>results/multiqc/</code> and the count table in <code>results/tables/</code>.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How the MRSA workflow looks.</li>
+<li>How to run the MRSA workflow.</li>
+<li>Which output files the MRSA workflow produces.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="parameters" class="level2" data-number="5">
+<h2 data-number="5" class="anchored" data-anchor-id="parameters"><span class="header-section-number">5</span> Parameters</h2>
+<p>In a typical bioinformatics project, considerable efforts are spent on tweaking parameters for the various programs involved. It would be inconvenient if you had to change in the shell scripts themselves every time you wanted to run with a new setting. Luckily, there is a better option for this: the <code>params</code> keyword.</p>
+<div class="sourceCode" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>rule some_rule:</span>
+<span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"..."</span></span>
+<span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb22-5"><a href="#cb22-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"..."</span></span>
+<span id="cb22-6"><a href="#cb22-6" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb22-7"><a href="#cb22-7" aria-hidden="true" tabindex="-1"></a>        cutoff<span class="op">=</span><span class="fl">2.5</span></span>
+<span id="cb22-8"><a href="#cb22-8" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb22-9"><a href="#cb22-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb22-10"><a href="#cb22-10" aria-hidden="true" tabindex="-1"></a><span class="co">        some_program --cutoff {params.cutoff} {input} {output}</span></span>
+<span id="cb22-11"><a href="#cb22-11" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Most of the programs are run with default settings in the MRSA workflow and don’t use the <code>params:</code> directive. However, the <code>get_SRA_by_accession</code> rule is an exception. Here the remote address for each of the files to download is passed to the shell directive via:</p>
+<div class="sourceCode" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_sample_url(wildcards):</span>
+<span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    samples <span class="op">=</span> {</span>
+<span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935090"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539767"</span>,</span>
+<span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935091"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539770"</span>,</span>
+<span id="cb23-5"><a href="#cb23-5" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935092"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539773"</span></span>
+<span id="cb23-6"><a href="#cb23-6" aria-hidden="true" tabindex="-1"></a>    }</span>
+<span id="cb23-7"><a href="#cb23-7" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> samples[wildcards.sample_id]</span>
+<span id="cb23-8"><a href="#cb23-8" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb23-9"><a href="#cb23-9" aria-hidden="true" tabindex="-1"></a>rule get_SRA_by_accession:</span>
+<span id="cb23-10"><a href="#cb23-10" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb23-11"><a href="#cb23-11" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve a single-read FASTQ file</span></span>
+<span id="cb23-12"><a href="#cb23-12" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb23-13"><a href="#cb23-13" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb23-14"><a href="#cb23-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/{sample_id}.fastq.gz"</span></span>
+<span id="cb23-15"><a href="#cb23-15" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb23-16"><a href="#cb23-16" aria-hidden="true" tabindex="-1"></a>        url <span class="op">=</span> get_sample_url</span>
+<span id="cb23-17"><a href="#cb23-17" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb23-18"><a href="#cb23-18" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb23-19"><a href="#cb23-19" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -O - {params.url} | seqtk sample - 25000 | gzip -c &gt; {output[0]}</span></span>
+<span id="cb23-20"><a href="#cb23-20" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>You may recognize this from <a href="snakemake-2-the-basics">page 2</a> of this tutorial where we used input functions to generate strings and lists of strings for the <code>input:</code> section of a rule. Using a function to return values based on the wildcards also works for <code>params:</code>. Here <code>sample_id</code> is a wildcard which in this specific workflow can be either <code>SRR935090</code>, <code>SRR935091</code>, or <code>SRR935092</code>. The wildcards object is passed to the function <code>get_sample_url</code> and depending on what output the rule is supposed to generate, <code>wildcards.sample_id</code> will take the value of either of the three sample ids. The <code>samples</code> variable defined in the function is a Python <a href="https://docs.python.org/3/tutorial/datastructures.html#dictionaries">dictionary</a> that has the URLs for each sample_id hard-coded. This dictionary is used to convert the value of the <code>sample_id</code> wildcard to a URL, which is returned by the function. Finally, in the <code>shell:</code> directive we access the <code>url</code> parameter with <code>{params.url}</code>. (We could have written three separate rules to download the samples, but it’s easy to see how that can become impractical.)</p>
+<p>Let’s add another parameter to the <code>get_SRA_by_accession</code> rule. As you can see in the shell command the FASTQ file downloaded by <code>wget</code> gets piped directly (the <code>-O -</code> part means send contents to STDOUT) to the <code>seqtk sample</code> command which reads from STDIN and outputs 25000 randomly sampled reads (out of the 100,000 contained in the example FASTQ file). Change in the rule to use the parameter <code>max_reads</code> instead and set the value to 20000. If you need help, click to show the solution below.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-13-contents" aria-controls="callout-13" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-13" class="callout-13-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<div class="sourceCode" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>rule get_SRA_by_accession:</span>
+<span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve a single-read FASTQ file</span></span>
+<span id="cb24-4"><a href="#cb24-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb24-5"><a href="#cb24-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb24-6"><a href="#cb24-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/{sample_id}.fastq.gz"</span></span>
+<span id="cb24-7"><a href="#cb24-7" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb24-8"><a href="#cb24-8" aria-hidden="true" tabindex="-1"></a>        url <span class="op">=</span> get_sample_url,</span>
+<span id="cb24-9"><a href="#cb24-9" aria-hidden="true" tabindex="-1"></a>        max_reads <span class="op">=</span> <span class="dv">20000</span></span>
+<span id="cb24-10"><a href="#cb24-10" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb24-11"><a href="#cb24-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb24-12"><a href="#cb24-12" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -O - {params.url} | seqtk sample - {params.max_reads} | gzip -c &gt; {output[0]}</span></span>
+<span id="cb24-13"><a href="#cb24-13" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</div>
+<p>Now run through the workflow. Because there’s been changes to the <code>get_SRA_by_accession</code> rule this will trigger a re-run of the rule for all three accessions. In addition all downstream rules that depend on output from <code>get_SRA_by_accession</code> are re-run.</p>
+<p>As you can see the parameter values we set in the <code>params</code> section don’t have to be static, they can be any Python expression. In particular, Snakemake provides a global dictionary of configuration parameters called <code>config</code>. Let’s modify <code>get_SRA_by_accession</code> to look something like this in order to make use of this dictionary:</p>
+<div class="sourceCode" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>rule get_SRA_by_accession:</span>
+<span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve a single-read FASTQ file</span></span>
+<span id="cb25-4"><a href="#cb25-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb25-5"><a href="#cb25-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb25-6"><a href="#cb25-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/{sample_id}.fastq.gz"</span></span>
+<span id="cb25-7"><a href="#cb25-7" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb25-8"><a href="#cb25-8" aria-hidden="true" tabindex="-1"></a>        url <span class="op">=</span> get_sample_url,</span>
+<span id="cb25-9"><a href="#cb25-9" aria-hidden="true" tabindex="-1"></a>        max_reads <span class="op">=</span> config[<span class="st">"max_reads"</span>]</span>
+<span id="cb25-10"><a href="#cb25-10" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb25-11"><a href="#cb25-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb25-12"><a href="#cb25-12" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -L {params.url} | seqtk sample - {params.max_reads} | gzip -c &gt; {output[0]}</span></span>
+<span id="cb25-13"><a href="#cb25-13" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Note that Snakemake now expects there to be a key named <code>max_reads</code> in the config dictionary. If we don’t populate the dictionary somehow the dictionary will be empty so if you were to run the workflow now it would trigger a <code>KeyError</code> (try running <code>snakemake -s snakefile_mrsa.smk -n</code> to see for yourself). In order to populate the config dictionary with data for the workflow we could use the <code>snakemake --config KEY=VALUE</code> syntax directly from the command line (<em>e.g.</em> <code>snakemake --config max_reads=20000 -s snakefile_mrsa.smk</code>). However, from a reproducibility perspective, it’s not optimal to set parameters from the command line, since it’s difficult to keep track of which parameter values that were used.</p>
+<p>A much better alternative is to use the <code>--configfile FILE</code> option to supply a configuration file to Snakemake. In this file we can collect all the project-specific settings, sample ids and so on. This also enables us to write the Snakefile in a more general manner so that it can be better reused between projects. Like several other files used in these tutorials, this file should be in <a href="https://en.wikipedia.org/wiki/YAML">YAML format</a>. Create the file below and save it as <code>config.yml</code>.</p>
+<div class="sourceCode" id="cb26"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="fu">max_reads</span><span class="kw">:</span><span class="at"> </span><span class="dv">25000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>If we now run Snakemake with <code>--configfile config.yml</code>, it will parse this file to form the <code>config</code> dictionary. If you want to overwrite a parameter value, <em>e.g.</em> for testing, you can still use the <code>--config KEY=VALUE</code> flag, as in <code>--config max_reads=1000</code>.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Rather than supplying the config file from the command line you could also add the line <code>configfile: "config.yml"</code> to the top of your Snakefile. Keep in mind that with such a setup Snakemake will complain if the file <code>config.yml</code> is not present.</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to set parameter values with the <code>params</code> directive.</li>
+<li>How to run Snakemake with the <code>config</code> variable and with a configuration file.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="logs" class="level2" data-number="6">
+<h2 data-number="6" class="anchored" data-anchor-id="logs"><span class="header-section-number">6</span> Logs</h2>
+<p>As you probably noticed it was difficult to follow how the workflow progressed since some rules printed a lot of output to the terminal. In some cases this also contained important information, such as statistics on the sequence alignments or genome indexing. This could be valuable for example if you later in the project get weird results and want to debug. It’s also important from a reproducibility perspective that the “paper trail” describing how the outputs were generated is saved. Luckily, Snakemake has a feature that can help with this. Just as we define <code>input</code> and <code>output</code> in a rule we can also define <code>log</code>.</p>
+<div class="sourceCode" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>rule some_rule:</span>
+<span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"..."</span></span>
+<span id="cb27-4"><a href="#cb27-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb27-5"><a href="#cb27-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"..."</span></span>
+<span id="cb27-6"><a href="#cb27-6" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb27-7"><a href="#cb27-7" aria-hidden="true" tabindex="-1"></a>        <span class="co">"..."</span></span>
+<span id="cb27-8"><a href="#cb27-8" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb27-9"><a href="#cb27-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb27-10"><a href="#cb27-10" aria-hidden="true" tabindex="-1"></a><span class="co">        echo 'Converting {input} to {output}' &gt; {log}</span></span>
+<span id="cb27-11"><a href="#cb27-11" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>A log file is not different from any other output file, but it’s dealt with a little differently by Snakemake. For example, it’s shown in the file summary when using <code>-D</code> and unlike other output files it’s not deleted if jobs fail which of course is necessary for debugging purposes. It’s also a good way to clarify the purpose of the file. We probably don’t need to save logs for all the rules, only the ones with interesting output.</p>
+<ul>
+<li><code>get_genome_fasta</code> and <code>get_genome_gff3</code> would be good to log since they are dependent on downloading files from an external server.</li>
+<li><code>multiqc</code> aggregates quality control data for all the samples into one html report, and the log contains information about which samples were aggregated.</li>
+<li><code>index_genome</code> outputs some statistics about the genome indexing.</li>
+<li><code>align_to_genome</code> outputs important statistics about the alignments. This is probably the most important log to save.</li>
+</ul>
+<p>Now add a log file to some or all of the rules above. A good place to save them to would be <code>results/logs/rule_name/</code>. In order to avoid that multiple jobs write to the same files Snakemake requires that all output and log files contain the same wildcards, so be sure to include any wildcards used in the rule in the log name as well, <em>e.g.</em> <code>{some_wildcard}.log</code>.</p>
+<p>You also have to specify in the <code>shell</code> section of each rule what you want the log to contain. Some of the programs we use send their log information to standard out, some to standard error and some let us specify a log file via a flag.</p>
+<p>For example, in the <code>align_to_genome</code> rule, it could look like this (Bowtie2 writes log info to standard error):</p>
+<div class="sourceCode" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>rule align_to_genome:</span>
+<span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb28-3"><a href="#cb28-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Align a fastq file to a genome index using Bowtie 2.</span></span>
+<span id="cb28-4"><a href="#cb28-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb28-5"><a href="#cb28-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb28-6"><a href="#cb28-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bam/{sample_id,\w+}.bam"</span></span>
+<span id="cb28-7"><a href="#cb28-7" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb28-8"><a href="#cb28-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/{sample_id}.fastq.gz"</span>,</span>
+<span id="cb28-9"><a href="#cb28-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.1.bt2"</span>,</span>
+<span id="cb28-10"><a href="#cb28-10" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.2.bt2"</span>,</span>
+<span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.3.bt2"</span>,</span>
+<span id="cb28-12"><a href="#cb28-12" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.4.bt2"</span>,</span>
+<span id="cb28-13"><a href="#cb28-13" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.rev.1.bt2"</span>,</span>
+<span id="cb28-14"><a href="#cb28-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.rev.2.bt2"</span></span>
+<span id="cb28-15"><a href="#cb28-15" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb28-16"><a href="#cb28-16" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/align_to_genome/{sample_id}.log"</span></span>
+<span id="cb28-17"><a href="#cb28-17" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb28-18"><a href="#cb28-18" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb28-19"><a href="#cb28-19" aria-hidden="true" tabindex="-1"></a><span class="co">        bowtie2 -x results/bowtie2/NCTC8325 -U {input[0]} &gt; {output} 2&gt;{log}</span></span>
+<span id="cb28-20"><a href="#cb28-20" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To save some time you can use the info below.</p>
+<div class="sourceCode" id="cb29"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># wget has a -o flag for specifying the log file</span></span>
+<span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="fu">wget</span> remote_file <span class="at">-O</span> output_file <span class="at">-o</span> {log}</span>
+<span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-4"><a href="#cb29-4" aria-hidden="true" tabindex="-1"></a><span class="co"># MultiQC and featureCounts write to standard error so we redirect with "2&gt;"</span></span>
+<span id="cb29-5"><a href="#cb29-5" aria-hidden="true" tabindex="-1"></a><span class="ex">multiqc</span> <span class="at">-n</span> output_file input_files <span class="dv">2</span><span class="op">&gt;</span> {log}</span>
+<span id="cb29-6"><a href="#cb29-6" aria-hidden="true" tabindex="-1"></a><span class="ex">featureCounts</span> <span class="at">-t</span> gene <span class="at">-g</span> gene_id <span class="at">-a</span> gff_file <span class="at">-o</span> output_file input_files <span class="dv">2</span><span class="op">&gt;</span>{log}</span>
+<span id="cb29-7"><a href="#cb29-7" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb29-8"><a href="#cb29-8" aria-hidden="true" tabindex="-1"></a><span class="co"># Bowtie2-build redirects to standard out so we use "&gt;"</span></span>
+<span id="cb29-9"><a href="#cb29-9" aria-hidden="true" tabindex="-1"></a><span class="ex">bowtie2-build</span> input_file index_dir <span class="op">&gt;</span> {log}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now rerun the whole workflow. Do the logs contain what they should? Note how much easier it is to follow the progression of the workflow when the rules write to logs instead of to the terminal.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you have a rule with a shell directive in which several commands are run and you want to save stdout and stderr for all commands into the same log file you can add <code>exec &amp;{log}</code> as the first line of the shell directive.</p>
+</div>
+</div>
+<p>If you run with <code>-D</code> (or <code>-S</code> for a simpler version) you will see that the summary table now also contains the log file for each of the files in the workflow.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to redirect output to log files with the <code>log</code> directive.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="temporary-files" class="level2" data-number="7">
+<h2 data-number="7" class="anchored" data-anchor-id="temporary-files"><span class="header-section-number">7</span> Temporary files</h2>
+<p>It’s not uncommon that workflows contain temporary files that should be kept for some time and then deleted once they are no longer needed. A typical case could be that some operation generates a file, which is then compressed to save space or indexed to make searching faster. There is then no need to save the original output file. Take a look at the job graph for our workflow again. The output from <code>align_to_genome</code> is a BAM file, which contains information about all the reads for a sample and where they map in the genome. For downstream processing we need this file to be sorted by genome coordinates. This is what the rule <code>sort_bam</code> is for. We therefore end up with both <code>results/bam/{sample_id}.bam</code> and <code>results/bam/{sample_id}.sorted.bam</code>.</p>
+<p>In Snakemake we can mark an output file as temporary like this:</p>
+<div class="sourceCode" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>output: temp(<span class="st">"..."</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The file will then be deleted as soon as all jobs where it’s an input have finished. Now do this for the output of <code>align_to_genome</code>. We have to rerun the rule for it to trigger, so use <code>-R align_to_genome</code>. It should look something like this:</p>
+<pre class="no-highlight"><code>.
+.
+rule sort_bam:
+    input: results/bam/SRR935090.bam
+    output: results/bam/SRR935090.sorted.bam
+    jobid: 2
+    wildcards: sample_id=SRR935090
+
+Removing temporary output file results/bam/SRR935090.bam.
+Finished job 2.
+.
+.</code></pre>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Sometimes you may want to trigger removal of temporary files without actually rerunning the jobs. You can then use the <code>--delete-temp-output</code> flag. In some cases you may instead want to run only parts of a workflow and therefore want to prevent files marked as temporary from being deleted (because the files are needed for other parts of the workflow). In such cases you can use the <code>--notemp</code> flag.</p>
+</div>
+</div>
+<p>Snakemake has a number of options for marking files:</p>
+<ul>
+<li><code>temp("...")</code>: The output file should be deleted once it’s no longer needed by any rules.</li>
+<li><code>protected("...")</code>: The output file should be write-protected. Typically used to protect files that require a huge amount of computational resources from being accidentally deleted.</li>
+<li><code>ancient("...")</code>: The timestamp of the input file is ignored and it’s always assumed to be older than any of the output files.</li>
+<li><code>touch("...")</code>: The output file should be “touched”, <em>i.e.</em> created or updated, when the rule has finished. Typically used as “flag files” to enforce some rule execution order without real file dependencies.</li>
+<li><code>directory("...")</code>: The output is a directory rather than a file.</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to mark an output file as temporary for automatic removal.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="targets" class="level2" data-number="8">
+<h2 data-number="8" class="anchored" data-anchor-id="targets"><span class="header-section-number">8</span> Targets</h2>
+<p>We’ve mentioned that Snakemake rules take either strings or a list of strings as input, and that we can use any Python expression in Snakemake workflows. Here we’ll show how these features help us condense the code of rules.</p>
+<p>Consider the rule <code>align_to_genome</code> below.</p>
+<div class="sourceCode" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>rule align_to_genome:</span>
+<span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Align a fastq file to a genome index using Bowtie 2.</span></span>
+<span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb32-5"><a href="#cb32-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb32-6"><a href="#cb32-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bam/{sample_id}.bam"</span></span>
+<span id="cb32-7"><a href="#cb32-7" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb32-8"><a href="#cb32-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/{sample_id}.fastq.gz"</span>,</span>
+<span id="cb32-9"><a href="#cb32-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.1.bt2"</span>,</span>
+<span id="cb32-10"><a href="#cb32-10" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.2.bt2"</span>,</span>
+<span id="cb32-11"><a href="#cb32-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.3.bt2"</span>,</span>
+<span id="cb32-12"><a href="#cb32-12" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.4.bt2"</span>,</span>
+<span id="cb32-13"><a href="#cb32-13" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.rev.1.bt2"</span>,</span>
+<span id="cb32-14"><a href="#cb32-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/bowtie2/NCTC8325.rev.2.bt2"</span></span>
+<span id="cb32-15"><a href="#cb32-15" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb32-16"><a href="#cb32-16" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb32-17"><a href="#cb32-17" aria-hidden="true" tabindex="-1"></a><span class="co">        bowtie2 -x results/bowtie2/NCTC8325 -U {input[0]} &gt; {output}</span></span>
+<span id="cb32-18"><a href="#cb32-18" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here we have seven inputs; the FASTQ file with the reads and six files with similar file names from the Bowtie2 genome indexing. Instead of writing all the filenames we can tidy this up by using a Python expression to generate a list of these files instead. If you’re familiar with Python you could do this with <a href="https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions">list comprehensions</a> like this:</p>
+<div class="sourceCode" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"data/{sample_id}.fastq.gz"</span>,</span>
+<span id="cb33-3"><a href="#cb33-3" aria-hidden="true" tabindex="-1"></a>    [<span class="ss">f"results/bowtie2/NCTC8325.</span><span class="sc">{</span>substr<span class="sc">}</span><span class="ss">.bt2"</span> <span class="cf">for</span></span>
+<span id="cb33-4"><a href="#cb33-4" aria-hidden="true" tabindex="-1"></a>        substr <span class="kw">in</span> [<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This will take the elements of the list of substrings one by one, and insert that element in the place of <code>{substr}</code>. Since this type of aggregating rules are quite common, Snakemake also has a more compact way of achieving the same thing.</p>
+<div class="sourceCode" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"data/{sample_id}.fastq.gz"</span>,</span>
+<span id="cb34-3"><a href="#cb34-3" aria-hidden="true" tabindex="-1"></a>    expand(<span class="st">"results/bowtie2/NCTC8325.</span><span class="sc">{substr}</span><span class="st">.bt2"</span>,</span>
+<span id="cb34-4"><a href="#cb34-4" aria-hidden="true" tabindex="-1"></a>        substr <span class="op">=</span> [<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-caution callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Caution
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>When using expand() like this, <code>substr</code> is not a wildcard because it is resolved to the values explicitly given inside the expand expression.</p>
+</div>
+</div>
+<p>Now change in the rules <code>index_genome</code> and <code>align_to_genome</code> to use the <code>expand()</code> expression.</p>
+<p>In the workflow we decide which samples to run by including the SRR ids in the names of the inputs to the rules <code>multiqc</code> and <code>generate_count_table</code>:</p>
+<div class="sourceCode" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>rule generate_count_table:</span>
+<span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/tables/counts.tsv"</span></span>
+<span id="cb35-4"><a href="#cb35-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb35-5"><a href="#cb35-5" aria-hidden="true" tabindex="-1"></a>        bams <span class="op">=</span> [<span class="st">"results/bam/SRR935090.sorted.bam"</span>,</span>
+<span id="cb35-6"><a href="#cb35-6" aria-hidden="true" tabindex="-1"></a>                <span class="st">"results/bam/SRR935091.sorted.bam"</span>,</span>
+<span id="cb35-7"><a href="#cb35-7" aria-hidden="true" tabindex="-1"></a>                <span class="st">"results/bam/SRR935092.sorted.bam"</span>],</span>
+<span id="cb35-8"><a href="#cb35-8" aria-hidden="true" tabindex="-1"></a>...</span>
+<span id="cb35-9"><a href="#cb35-9" aria-hidden="true" tabindex="-1"></a>rule multiqc:</span>
+<span id="cb35-10"><a href="#cb35-10" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb35-11"><a href="#cb35-11" aria-hidden="true" tabindex="-1"></a>        html <span class="op">=</span> <span class="st">"results/multiqc/multiqc.html"</span>,</span>
+<span id="cb35-12"><a href="#cb35-12" aria-hidden="true" tabindex="-1"></a>        stats <span class="op">=</span> <span class="st">"results/multiqc/multiqc_general_stats.txt"</span></span>
+<span id="cb35-13"><a href="#cb35-13" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb35-14"><a href="#cb35-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/fastqc/SRR935090_fastqc.zip"</span>,</span>
+<span id="cb35-15"><a href="#cb35-15" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/fastqc/SRR935091_fastqc.zip"</span>,</span>
+<span id="cb35-16"><a href="#cb35-16" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/fastqc/SRR935092_fastqc.zip"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>The output files from these two rules, <code>results/multiqc.html</code> and <code>results/tables/counts.tsv</code>, are in turn specified as input to the <code>all</code> rule at the top of the file. Because the first rule is targeted by default when we run Snakemake on the command line (like we mentioned in <a href="snakemake-4-the-mrsa-workflow">snakemake-4-the-mrsa-workflow</a>) this is what triggers the rules to run on each of the three samples.</p>
+<p>However, this is a potential source of errors since it’s easy to change in one place and forget to change in the other. Because we can use Python code “everywhere” let’s instead define a list of sample ids and put at the very top of the Snakefile, just before the rule <code>all</code>:</p>
+<div class="sourceCode" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>SAMPLES <span class="op">=</span> [<span class="st">"SRR935090"</span>, <span class="st">"SRR935091"</span>, <span class="st">"SRR935092"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now use <code>expand()</code> in <code>multiqc</code> and <code>generate_count_table</code> to use <code>SAMPLES</code> for the sample ids. For the <code>multiqc</code> rule it could look like this:</p>
+<div class="sourceCode" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a>    expand(<span class="st">"results/fastqc/</span><span class="sc">{sample_id}</span><span class="st">_fastqc.zip"</span>, sample_id <span class="op">=</span> SAMPLES)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>See if you can update the <code>generate_count_table</code> rule in the same manner!</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use the <code>expand()</code> expression to create a list with file names, inserting all provided wildcard values.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="shadow-rules" class="level2" data-number="9">
+<h2 data-number="9" class="anchored" data-anchor-id="shadow-rules"><span class="header-section-number">9</span> Shadow rules</h2>
+<p>Take a look at the <code>index_genome</code> rule below:</p>
+<div class="sourceCode" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>rule index_genome:</span>
+<span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Index a genome using Bowtie 2.</span></span>
+<span id="cb38-4"><a href="#cb38-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb38-5"><a href="#cb38-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb38-6"><a href="#cb38-6" aria-hidden="true" tabindex="-1"></a>        index <span class="op">=</span> expand(<span class="st">"results/bowtie2/NCTC8325.</span><span class="sc">{substr}</span><span class="st">.bt2"</span>,</span>
+<span id="cb38-7"><a href="#cb38-7" aria-hidden="true" tabindex="-1"></a>           substr <span class="op">=</span> [<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>])</span>
+<span id="cb38-8"><a href="#cb38-8" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb38-9"><a href="#cb38-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/NCTC8325.fa.gz"</span></span>
+<span id="cb38-10"><a href="#cb38-10" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb38-11"><a href="#cb38-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/index_genome/NCTC8325.log"</span></span>
+<span id="cb38-12"><a href="#cb38-12" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb38-13"><a href="#cb38-13" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb38-14"><a href="#cb38-14" aria-hidden="true" tabindex="-1"></a><span class="co">        # Bowtie2 cannot use .gz, so unzip to a temporary file first</span></span>
+<span id="cb38-15"><a href="#cb38-15" aria-hidden="true" tabindex="-1"></a><span class="co">        gunzip -c {input} &gt; tempfile</span></span>
+<span id="cb38-16"><a href="#cb38-16" aria-hidden="true" tabindex="-1"></a><span class="co">        bowtie2-build tempfile results/bowtie2/NCTC8325 &gt;{log}</span></span>
+<span id="cb38-17"><a href="#cb38-17" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb38-18"><a href="#cb38-18" aria-hidden="true" tabindex="-1"></a><span class="co">        # Remove the temporary file</span></span>
+<span id="cb38-19"><a href="#cb38-19" aria-hidden="true" tabindex="-1"></a><span class="co">        rm tempfile</span></span>
+<span id="cb38-20"><a href="#cb38-20" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>There is a temporary file here called <code>tempfile</code> which is the uncompressed version of the input, since Bowtie2 cannot use compressed files. There are a number of drawbacks with having files that aren’t explicitly part of the workflow as input/output files to rules:</p>
+<ul>
+<li>Snakemake cannot clean up these files if the job fails, as it would do for normal output files.</li>
+<li>If several jobs are run in parallel there is a risk that they write to <code>tempfile</code> at the same time. This can lead to very scary results.</li>
+<li>Sometimes we don’t know the names of all the files that a program can generate. It is, for example, not unusual that programs leave some kind of error log behind if something goes wrong.</li>
+</ul>
+<p>All of these issues can be dealt with by using the <code>shadow</code> option for a rule. The shadow option results in that each execution of the rule is run in an isolated temporary directory (located in <code>.snakemake/shadow/</code> by default). There are a few options for <code>shadow</code> (for the full list of these options see the <a href="https://snakemake.readthedocs.io/en/stable/snakefiles/rules.html#shadow-rules">Snakemake docs</a>). The most simple is <code>shadow: "minimal"</code>, which means that the rule is executed in an empty directory that the input files to the rule have been symlinked into. For the rule below, that means that the only file available would be <code>input.txt</code>. The shell commands would generate the files <code>some_other_junk_file</code> and <code>output.txt</code>. Lastly, Snakemake will move the output file (<code>output.txt</code>) to its “real” location and remove the whole shadow directory. We therefore never have to think about manually removing <code>some_other_junk_file</code>.</p>
+<div class="sourceCode" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>rule some_rule:</span>
+<span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb39-3"><a href="#cb39-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"output.txt"</span></span>
+<span id="cb39-4"><a href="#cb39-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb39-5"><a href="#cb39-5" aria-hidden="true" tabindex="-1"></a>        <span class="co">"input.txt"</span></span>
+<span id="cb39-6"><a href="#cb39-6" aria-hidden="true" tabindex="-1"></a>    shadow: <span class="st">"minimal"</span></span>
+<span id="cb39-7"><a href="#cb39-7" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb39-8"><a href="#cb39-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb39-9"><a href="#cb39-9" aria-hidden="true" tabindex="-1"></a><span class="co">        touch some_other_junk_file</span></span>
+<span id="cb39-10"><a href="#cb39-10" aria-hidden="true" tabindex="-1"></a><span class="co">        cp {input} {output}</span></span>
+<span id="cb39-11"><a href="#cb39-11" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Try this out for the rules where we have to “manually” deal with files that aren’t tracked by Snakemake (<code>multiqc</code>, <code>index_genome</code>). Also remove the shell commands that remove temporary files from those rules, as they are no longer needed. Now rerun the workflow and validate that the temporary files don’t show up in your working directory.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Some people use the shadow option for almost every rule and some never use it at all. One thing to keep in mind is that it leads to some extra file operations when the outputs are moved to their final location. This is no issue when the shadow directory is on the same disk as the output directory, but if you’re running on a distributed file system and generate very many or very large files it might be worth considering other options (see <em>e.g.</em> the <code>--shadow-prefix</code> flag).</p>
+</div>
+</div>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to use the shadow option to handle files that are not tracked by Snakemake.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="generalising-workflows" class="level2" data-number="10">
+<h2 data-number="10" class="anchored" data-anchor-id="generalising-workflows"><span class="header-section-number">10</span> Generalising workflows</h2>
+<p>It’s a good idea to separate project-specific parameters from the actual implementation of the workflow. This allows anyone using the workflow to modify its behaviour without changing the underlying code, making the workflow more general.</p>
+<p>In order to generalize our RNA-seq analysis workflow we should move all project-specific information to <code>config.yml</code>. This means that we want the config file to:</p>
+<ul>
+<li>Specify which samples to run.</li>
+<li>Specify which genome to align to and where to download its sequence and annotation files.</li>
+<li>(Contain any other parameters we might need to make it into a general workflow, <em>e.g.</em> to support both paired-end and single-read sequencing)</li>
+</ul>
+<div class="callout callout-style-default callout-note callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Note
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Putting all configuration in <code>config.yml</code> will break the <code>generate_rulegraph</code> rule. You can fix it either by replacing <code>--config max_reads=0</code> with <code>--configfile=config.yml</code> in the shell command of that rule in the Snakefile, or by adding <code>configfile: "config.yml"</code> to the top of the Snakefile (as mentioned in a previous tip).</p>
+</div>
+</div>
+<p>The first point is straightforward; rather than using <code>SAMPLES = ["..."]</code> in the Snakefile we define it as a parameter in <code>config.yml</code>. You can either add it as a list similar to the way it was expressed before by adding:</p>
+<div class="sourceCode" id="cb40"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="fu">SAMPLES</span><span class="kw">:</span><span class="at"> </span><span class="kw">[</span><span class="st">"SRR935090"</span><span class="kw">,</span><span class="at"> </span><span class="st">"SRR935091"</span><span class="kw">,</span><span class="at"> </span><span class="st">"SRR935092"</span><span class="kw">]</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To <code>config.yml</code>, or you can use this YAML notation (whether you choose <code>SAMPLES</code> or <code>sample_ids</code> as the name of the entry doesn’t matter, you will just have to reference the same name in the config dictionary inside the workflow):</p>
+<div class="sourceCode" id="cb41"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sample_ids</span><span class="kw">:</span></span>
+<span id="cb41-2"><a href="#cb41-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> SRR935090</span></span>
+<span id="cb41-3"><a href="#cb41-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> SRR935091</span></span>
+<span id="cb41-4"><a href="#cb41-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="kw">-</span><span class="at"> SRR935092</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Change the workflow to reference <code>config["sample_ids"]</code> (if using the latter example) instead of <code>SAMPLES</code>, as in:</p>
+<div class="sourceCode" id="cb42"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a><span class="fu">expand</span><span class="er">(</span><span class="st">"results/fastqc/{sample_id}_fastqc.zip"</span><span class="ex">,</span></span>
+<span id="cb42-2"><a href="#cb42-2" aria-hidden="true" tabindex="-1"></a>            <span class="ex">sample_id</span> = config<span class="pp">[</span><span class="st">"sample_ids"</span><span class="pp">]</span><span class="kw">)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Remove the line with <code>SAMPLES = ["SRR935090", "SRR935091", "SRR935092"]</code> that we added to the top of <code>snakefile_mrsa.smk</code> in <a href="snakemake-8-targets">Snakemake 8: Targets</a>.</p>
+<p>Do a dry-run afterwards to make sure that everything works as expected.</p>
+<p>You may remember from the <a href="snakemake-5-parameters">snakemake-5-parameters</a> part of this tutorial that we’re using a function to return the URL of the FASTQ files to download for each sample:</p>
+<div class="sourceCode" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_sample_url(wildcards):</span>
+<span id="cb43-2"><a href="#cb43-2" aria-hidden="true" tabindex="-1"></a>    samples <span class="op">=</span> {</span>
+<span id="cb43-3"><a href="#cb43-3" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935090"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539767"</span>,</span>
+<span id="cb43-4"><a href="#cb43-4" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935091"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539770"</span>,</span>
+<span id="cb43-5"><a href="#cb43-5" aria-hidden="true" tabindex="-1"></a>        <span class="st">"SRR935092"</span>: <span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539773"</span></span>
+<span id="cb43-6"><a href="#cb43-6" aria-hidden="true" tabindex="-1"></a>    }</span>
+<span id="cb43-7"><a href="#cb43-7" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> samples[wildcards.sample_id]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Here the URLs of each sample_id is hard-coded in the <code>samples</code> dictionary inside the function. To generalize this function we can move the definition to the config file, placing it for example under an entry that we call <code>sample_urls</code> like this:</p>
+<div class="sourceCode" id="cb44"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sample_urls</span><span class="kw">:</span></span>
+<span id="cb44-2"><a href="#cb44-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">SRR935090</span><span class="kw">:</span><span class="at"> </span><span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539767"</span></span>
+<span id="cb44-3"><a href="#cb44-3" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">SRR935091</span><span class="kw">:</span><span class="at"> </span><span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539770"</span></span>
+<span id="cb44-4"><a href="#cb44-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">SRR935092</span><span class="kw">:</span><span class="at"> </span><span class="st">"https://figshare.scilifelab.se/ndownloader/files/39539773"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This is what’s called ‘nested’ key/value pairs, meaning that each sample_id -&gt; URL pair becomes nested under the config key <code>sample_urls</code>. So in order to access the URL of <em>e.g.</em> <code>SRR935090</code> we would use <code>config["sample_urls"]["SRR935090"]</code>. This means that you will have to update the <code>get_sample_url</code> function to:</p>
+<div class="sourceCode" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_sample_url(wildcards):</span>
+<span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> config[<span class="st">"sample_urls"</span>][wildcards.sample_id]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now the function uses the global <code>config</code> dictionary to return URLs for each sample_id. Again, do a dry-run to see that the new implementation works.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>If you were to scale up this workflow with more samples it could become impractical to have to define the URLs by hand in the config file. A tip then is to have a separate file where samples are listed in one column and the URLs (or file paths) in another column. With a few lines of python code you could then read that list at the start of the workflow and add each sample to the config dictionary.</p>
+</div>
+</div>
+<p>Now let’s take a look at the genome reference used in the workflow. In the <code>get_genome_fasta</code> and <code>get_genome_gff3</code> rules we have hard-coded FTP paths to the FASTA GFF annotation file for the genome <code>NCTC8325</code>. We can generalize this in a similar fashion to what we did with the <code>get_SRA_by_accession</code> rule. Let’s add a nested entry called <code>genomes</code> to the config file that will hold the genome id and FTP paths to the FASTA and GFF file:</p>
+<div class="sourceCode" id="cb46"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="fu">genomes</span><span class="kw">:</span></span>
+<span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">NCTC8325</span><span class="kw">:</span></span>
+<span id="cb46-3"><a href="#cb46-3" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">fasta</span><span class="kw">:</span><span class="at"> ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz</span></span>
+<span id="cb46-4"><a href="#cb46-4" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">gff3</span><span class="kw">:</span><span class="at"> ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.37.gff3.gz</span></span>
+<span id="cb46-5"><a href="#cb46-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">ST398</span><span class="kw">:</span></span>
+<span id="cb46-6"><a href="#cb46-6" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">fasta</span><span class="kw">:</span><span class="at"> ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection//staphylococcus_aureus_subsp_aureus_st398/dna/Staphylococcus_aureus_subsp_aureus_st398.ASM958v1.dna.toplevel.fa.gz</span></span>
+<span id="cb46-7"><a href="#cb46-7" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">gff3</span><span class="kw">:</span><span class="at"> ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_st398//Staphylococcus_aureus_subsp_aureus_st398.ASM958v1.37.gff3.gz</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>As you can see this is very similar to what with did with <code>sample_urls</code>, just that we have one more nested level. Now to access the FTP path to the FASTA file for genome id <code>NCTC8325</code> we can use <code>config["genomes"]["NCTC8325"]["fasta"]</code>.</p>
+<p>Let’s now look at how to do the mapping from genome id to FASTA path in the rule <code>get_genome_fasta</code>. This is how the rule currently looks (if you have added the log section as previously described).</p>
+<div class="sourceCode" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>rule get_genome_fasta:</span>
+<span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb47-3"><a href="#cb47-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve the sequence in fasta format for a genome.</span></span>
+<span id="cb47-4"><a href="#cb47-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb47-5"><a href="#cb47-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb47-6"><a href="#cb47-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/raw_external/NCTC8325.fa.gz"</span></span>
+<span id="cb47-7"><a href="#cb47-7" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb47-8"><a href="#cb47-8" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/get_genome_fasta/NCTC8325.log"</span></span>
+<span id="cb47-9"><a href="#cb47-9" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb47-10"><a href="#cb47-10" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb47-11"><a href="#cb47-11" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -o {log} ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz -O {output}</span></span>
+<span id="cb47-12"><a href="#cb47-12" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>We don’t want the hard-coded genome id <code>NCTC8325</code>, so replace that with a wildcard, say <code>{genome_id}</code> (remember to add the wildcard to the <code>log:</code> directive as well). We now need to supply the remote paths to the FASTA file for a given genome id. Because we’ve added this information to the config file we just need to pass it to the rule in some way, and just like in the <code>get_SRA_by_accession</code> rule we’ll use a function to do the job:</p>
+<div class="sourceCode" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_fasta_path(wildcards):</span>
+<span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> config[<span class="st">"genomes"</span>][wildcards.genome_id][<span class="st">"fasta"</span>]</span>
+<span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb48-4"><a href="#cb48-4" aria-hidden="true" tabindex="-1"></a>rule get_genome_fasta:</span>
+<span id="cb48-5"><a href="#cb48-5" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb48-6"><a href="#cb48-6" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve the sequence in fasta format for a genome.</span></span>
+<span id="cb48-7"><a href="#cb48-7" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb48-8"><a href="#cb48-8" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb48-9"><a href="#cb48-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/ref/{genome_id}.fa.gz"</span></span>
+<span id="cb48-10"><a href="#cb48-10" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb48-11"><a href="#cb48-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/get_genome_fasta/{genome_id}.log"</span></span>
+<span id="cb48-12"><a href="#cb48-12" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb48-13"><a href="#cb48-13" aria-hidden="true" tabindex="-1"></a>        fasta_path <span class="op">=</span> get_fasta_path</span>
+<span id="cb48-14"><a href="#cb48-14" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb48-15"><a href="#cb48-15" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb48-16"><a href="#cb48-16" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -o {log} {params.fasta_path} -O {output}</span></span>
+<span id="cb48-17"><a href="#cb48-17" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now change the <code>get_genome_gff3</code> rule in a similar manner. Click to see the solution below if you’re having trouble.</p>
+<div class="callout callout-style-default callout-tip callout-titled" title="Click to show">
+<div class="callout-header d-flex align-content-center" data-bs-toggle="collapse" data-bs-target=".callout-26-contents" aria-controls="callout-26" aria-expanded="false" aria-label="Toggle callout">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Click to show
+</div>
+<div class="callout-btn-toggle d-inline-block border-0 py-1 ps-1 pe-0 float-end"><i class="callout-toggle"></i></div>
+</div>
+<div id="callout-26" class="callout-26-contents callout-collapse collapse">
+<div class="callout-body-container callout-body">
+<div class="sourceCode" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_gff_path(wildcards):</span>
+<span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> config[<span class="st">"genomes"</span>][wildcards.genome_id][<span class="st">"gff3"</span>]</span>
+<span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb49-4"><a href="#cb49-4" aria-hidden="true" tabindex="-1"></a>rule get_genome_gff3:</span>
+<span id="cb49-5"><a href="#cb49-5" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb49-6"><a href="#cb49-6" aria-hidden="true" tabindex="-1"></a><span class="co">    Retrieve annotation in gff3 format for a genome.</span></span>
+<span id="cb49-7"><a href="#cb49-7" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb49-8"><a href="#cb49-8" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb49-9"><a href="#cb49-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/ref/{genome_id}.gff3.gz"</span></span>
+<span id="cb49-10"><a href="#cb49-10" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb49-11"><a href="#cb49-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/get_genome_gff3/{genome_id}.log"</span></span>
+<span id="cb49-12"><a href="#cb49-12" aria-hidden="true" tabindex="-1"></a>    params:</span>
+<span id="cb49-13"><a href="#cb49-13" aria-hidden="true" tabindex="-1"></a>        gff3_path <span class="op">=</span> get_gff_path</span>
+<span id="cb49-14"><a href="#cb49-14" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb49-15"><a href="#cb49-15" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb49-16"><a href="#cb49-16" aria-hidden="true" tabindex="-1"></a><span class="co">        wget -o {log} {params.gff3_path} -O {output}</span></span>
+<span id="cb49-17"><a href="#cb49-17" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</div>
+</div>
+</div>
+<p>Also change in <code>index_genome</code> to use a wildcard rather than a hard-coded genome id. Here you will run into a complication if you have followed the previous instructions and use the <code>expand()</code> expression. We want the list to expand to <code>["results/bowtie2/{genome_id}.1.bt2", "results/bowtie2/{genome_id}.2.bt2", ...]</code>, <em>i.e.</em> only expanding the wildcard referring to the Bowtie2 index. To keep the <code>genome_id</code> wildcard from being expanded we have to “mask” it with double curly brackets: <code>{genome_id}</code>. In addition, we need to replace the hard-coded <code>results/bowtie2/NCTC8325</code> in the shell directive of the rule with the genome id wildcard. Inside the shell directive the wildcard object is accessed with this syntax: <code>{wildcards.genome_id}</code>, so the Bowtie2-build command should be:</p>
+<div class="sourceCode" id="cb50"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="ex">bowtie2-build</span> tempfile results/bowtie2/{wildcards.genome_id} <span class="op">&gt;</span> {log}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Note that this will only work if the <code>{genome_id}</code> wildcard can be resolved to something defined in the config (currently <code>NCTC8325</code> or <code>ST398</code>). If you try to generate a FASTA file for a genome id not defined in the config Snakemake will complain, even at the dry-run stage.</p>
+<p>Finally, remember that any wildcards need to be present both in the <code>output:</code> and <code>log:</code> directives? This means we have to update the <code>log:</code> directive in <code>index_genome</code> as well. The final rule should look like this:</p>
+<div class="sourceCode" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>rule index_genome:</span>
+<span id="cb51-2"><a href="#cb51-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb51-3"><a href="#cb51-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Index a genome using Bowtie 2.</span></span>
+<span id="cb51-4"><a href="#cb51-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb51-5"><a href="#cb51-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb51-6"><a href="#cb51-6" aria-hidden="true" tabindex="-1"></a>        expand(<span class="st">"results/bowtie2/</span><span class="sc">{{</span><span class="st">genome_id</span><span class="sc">}}</span><span class="st">.</span><span class="sc">{substr}</span><span class="st">.bt2"</span>,</span>
+<span id="cb51-7"><a href="#cb51-7" aria-hidden="true" tabindex="-1"></a>            substr <span class="op">=</span> [<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>])</span>
+<span id="cb51-8"><a href="#cb51-8" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb51-9"><a href="#cb51-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"data/ref/{genome_id}.fa.gz"</span></span>
+<span id="cb51-10"><a href="#cb51-10" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb51-11"><a href="#cb51-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/index_genome/{genome_id}.log"</span></span>
+<span id="cb51-12"><a href="#cb51-12" aria-hidden="true" tabindex="-1"></a>    shadow: <span class="st">"minimal"</span></span>
+<span id="cb51-13"><a href="#cb51-13" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb51-14"><a href="#cb51-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb51-15"><a href="#cb51-15" aria-hidden="true" tabindex="-1"></a><span class="co">        # Bowtie2 cannot use .gz, so unzip to a temporary file first</span></span>
+<span id="cb51-16"><a href="#cb51-16" aria-hidden="true" tabindex="-1"></a><span class="co">        gunzip -c {input} &gt; tempfile</span></span>
+<span id="cb51-17"><a href="#cb51-17" aria-hidden="true" tabindex="-1"></a><span class="co">        bowtie2-build tempfile results/bowtie2/{wildcards.genome_id} &gt; {log}</span></span>
+<span id="cb51-18"><a href="#cb51-18" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Good job! The rules <code>get_genome_fasta</code>, <code>get_genome_gff3</code> and <code>index_genome</code> can now download and index <em>any genome</em> as long as we provide valid links in the config file.</p>
+<p>However, we need to define somewhere which genome id we actually want to use when running the workflow. This needs to be done both in <code>align_to_genome</code> and <code>generate_count_table</code>. Do this by introducing a parameter in <code>config.yml</code> called <code>"genome_id"</code> (you can set it to either <code>NCTC8325</code> or <code>ST398</code>), <em>e.g.</em>:</p>
+<div class="sourceCode" id="cb52"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="fu">genome_id</span><span class="kw">:</span><span class="at"> </span><span class="st">"NCTC8325"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now we can resolve the <code>genome_id</code> wildcard from the config. See below for an example for <code>align_to_genome</code>. Here the <code>substr</code> wildcard gets expanded from a list while <code>genome_id</code> gets expanded from the config file.</p>
+<div class="sourceCode" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a><span class="bu">input</span>:</span>
+<span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"data/{sample_id}.fastq.gz"</span>,</span>
+<span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>    index <span class="op">=</span> expand(<span class="st">"results/bowtie2/</span><span class="sc">{genome_id}</span><span class="st">.</span><span class="sc">{substr}</span><span class="st">.bt2"</span>,</span>
+<span id="cb53-4"><a href="#cb53-4" aria-hidden="true" tabindex="-1"></a>           genome_id <span class="op">=</span> config[<span class="st">"genome_id"</span>],</span>
+<span id="cb53-5"><a href="#cb53-5" aria-hidden="true" tabindex="-1"></a>           substr <span class="op">=</span> [<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Also change the hard-coded genome id in the <code>generate_count_table</code> input in a similar manner:</p>
+<div class="sourceCode" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>rule generate_count_table:</span>
+<span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Generate a count table using featureCounts.</span></span>
+<span id="cb54-4"><a href="#cb54-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb54-5"><a href="#cb54-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb54-6"><a href="#cb54-6" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/tables/counts.tsv"</span>,</span>
+<span id="cb54-7"><a href="#cb54-7" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/tables/counts.tsv.summary"</span></span>
+<span id="cb54-8"><a href="#cb54-8" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb54-9"><a href="#cb54-9" aria-hidden="true" tabindex="-1"></a>        bams<span class="op">=</span>expand(<span class="st">"results/bam/</span><span class="sc">{sample_id}</span><span class="st">.sorted.bam"</span>,</span>
+<span id="cb54-10"><a href="#cb54-10" aria-hidden="true" tabindex="-1"></a>                    sample_id <span class="op">=</span> config[<span class="st">"sample_ids"</span>]),</span>
+<span id="cb54-11"><a href="#cb54-11" aria-hidden="true" tabindex="-1"></a>        annotation<span class="op">=</span>expand(<span class="st">"data/ref/</span><span class="sc">{genome_id}</span><span class="st">.gff3.gz"</span>,</span>
+<span id="cb54-12"><a href="#cb54-12" aria-hidden="true" tabindex="-1"></a>                    genome_id <span class="op">=</span> config[<span class="st">"genome_id"</span>])</span>
+<span id="cb54-13"><a href="#cb54-13" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb54-14"><a href="#cb54-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/generate_count_table.log"</span></span>
+<span id="cb54-15"><a href="#cb54-15" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb54-16"><a href="#cb54-16" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb54-17"><a href="#cb54-17" aria-hidden="true" tabindex="-1"></a><span class="co">        featureCounts -t gene -g gene_id -a {input.annotation} -o {output[0]} {input.bams} 2&gt;{log}</span></span>
+<span id="cb54-18"><a href="#cb54-18" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>In general, we want the rules as far downstream as possible in the workflow to be the ones that determine what the wildcards should resolve to. In our case this is <code>align_to_genome</code> and <code>generate_count_table</code>. You can think of it like the rule that really “needs” the file asks for it, and then it’s up to Snakemake to determine how it can use all the available rules to generate it. Here the <code>align_to_genome</code> rule says “I need this genome index to align my sample to” and then it’s up to Snakemake to determine how to download and build the index.</p>
+<p>One last thing is to change the hard-coded <code>NCTC8325</code> in the <code>shell:</code> directive of <code>align_to_genome</code>. Bowtie2 expects the index name supplied with the <code>-x</code> flag to be without the “.*.bt2” suffix so we can’t use <code>-x {input.index}</code>. Instead we’ll insert the genome_id directly from the config like this:</p>
+<div class="sourceCode" id="cb55"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a><span class="ex">shell:</span></span>
+<span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"""</span></span>
+<span id="cb55-3"><a href="#cb55-3" aria-hidden="true" tabindex="-1"></a><span class="st">    bowtie2 -x results/bowtie2/{config[genome_id]} -U {input[0]} &gt; {output} 2&gt;{log}</span></span>
+<span id="cb55-4"><a href="#cb55-4" aria-hidden="true" tabindex="-1"></a><span class="st">    """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<div class="callout callout-style-default callout-note callout-titled" title="Summary">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Summary
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>Well done! You now have a complete Snakemake workflow with a number of excellent features:</p>
+<ul>
+<li>A general RNA-seq pipeline which can easily be reused between projects, thanks to clear separation between code and settings.</li>
+<li>Great traceability due to logs and summary tables.</li>
+<li>Clearly defined the environment for the workflow using Conda.</li>
+<li>The workflow is neat and free from temporary files due to using <code>temp()</code> and <code>shadow</code>.</li>
+<li>A logical directory structure which makes it easy to separate data and results of different software packages.</li>
+<li>A project set up in a way that makes it very easy to distribute and reproduce either via Git, Snakemake’s <code>--archive</code> option or a Docker image.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="reading-samples-from-a-file-instead-of-hard-coding-them" class="level2" data-number="11">
+<h2 data-number="11" class="anchored" data-anchor-id="reading-samples-from-a-file-instead-of-hard-coding-them"><span class="header-section-number">11</span> Reading samples from a file instead of hard-coding them</h2>
+<p>So far we’ve specified the samples to use in the workflow either as a hard-coded list in the Snakefile, or as a list in the configuration file. This is of course impractical for large real-world examples. Here we’ll just quickly show how you could supply the samples instead via a tab-separated file. For example you could create a file called <code>samples.tsv</code> with the following content:</p>
+<pre><code>SRR935090   https://figshare.scilifelab.se/ndownloader/files/39539767
+SRR935091   https://figshare.scilifelab.se/ndownloader/files/39539770
+SRR935092   https://figshare.scilifelab.se/ndownloader/files/39539773</code></pre>
+<p>The first column has the sample id and the second column has the url to the fastq file. Now in order to read this into the workflow we need to use a few lines of python code. Since you can mix python code with rule definitions in Snakemake we’ll just add the following lines to the top of the Snakefile:</p>
+<div class="sourceCode" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="co"># define an empty 'samples' dictionary</span></span>
+<span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> {}</span>
+<span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a><span class="co"># read the sample list file and populate the dictionary</span></span>
+<span id="cb57-4"><a href="#cb57-4" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"samples.tsv"</span>, <span class="st">"r"</span>) <span class="im">as</span> fhin:</span>
+<span id="cb57-5"><a href="#cb57-5" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> line <span class="kw">in</span> fhin:</span>
+<span id="cb57-6"><a href="#cb57-6" aria-hidden="true" tabindex="-1"></a>        <span class="co"># strip the newline character from the end of the line</span></span>
+<span id="cb57-7"><a href="#cb57-7" aria-hidden="true" tabindex="-1"></a>        <span class="co"># then split by tab character to get the sample id and url</span></span>
+<span id="cb57-8"><a href="#cb57-8" aria-hidden="true" tabindex="-1"></a>        sample_id, url <span class="op">=</span> line.strip().split(<span class="st">"</span><span class="ch">\t</span><span class="st">"</span>)</span>
+<span id="cb57-9"><a href="#cb57-9" aria-hidden="true" tabindex="-1"></a>        <span class="co"># store the url in the dictionary with the sample id as key</span></span>
+<span id="cb57-10"><a href="#cb57-10" aria-hidden="true" tabindex="-1"></a>        samples[sample_id] <span class="op">=</span> url</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now we can use the <code>samples</code> dictionary in the workflow. For example, to get the url for <code>SRR935090</code> we can use <code>samples["SRR935090"]</code>.</p>
+<p>For example, the <code>get_sample_url</code> function can now be written as:</p>
+<div class="sourceCode" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> get_sample_url(wildcards):</span>
+<span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> samples[wildcards.sample_id]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>We can also use the <code>samples</code> dictionary in <code>expand()</code>, for example in the <code>multiqc</code> rule:</p>
+<div class="sourceCode" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>rule multiqc:</span>
+<span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>    <span class="co">"""</span></span>
+<span id="cb59-3"><a href="#cb59-3" aria-hidden="true" tabindex="-1"></a><span class="co">    Aggregate all FastQC reports into a MultiQC report.</span></span>
+<span id="cb59-4"><a href="#cb59-4" aria-hidden="true" tabindex="-1"></a><span class="co">    """</span></span>
+<span id="cb59-5"><a href="#cb59-5" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb59-6"><a href="#cb59-6" aria-hidden="true" tabindex="-1"></a>        html<span class="op">=</span><span class="st">"results/multiqc/multiqc.html"</span>,</span>
+<span id="cb59-7"><a href="#cb59-7" aria-hidden="true" tabindex="-1"></a>        stats<span class="op">=</span><span class="st">"results/multiqc/multiqc_general_stats.txt"</span></span>
+<span id="cb59-8"><a href="#cb59-8" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb59-9"><a href="#cb59-9" aria-hidden="true" tabindex="-1"></a>        expand(<span class="st">"results/fastqc/</span><span class="sc">{sample_id}</span><span class="st">_fastqc.zip"</span>, sample_id <span class="op">=</span> samples.keys())</span>
+<span id="cb59-10"><a href="#cb59-10" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb59-11"><a href="#cb59-11" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/logs/multiqc/multiqc.log"</span></span>
+<span id="cb59-12"><a href="#cb59-12" aria-hidden="true" tabindex="-1"></a>    shadow: <span class="st">"minimal"</span></span>
+<span id="cb59-13"><a href="#cb59-13" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb59-14"><a href="#cb59-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb59-15"><a href="#cb59-15" aria-hidden="true" tabindex="-1"></a><span class="co">        # Run multiQC and keep the html report</span></span>
+<span id="cb59-16"><a href="#cb59-16" aria-hidden="true" tabindex="-1"></a><span class="co">        multiqc -n multiqc.html {input} 2&gt; {log}</span></span>
+<span id="cb59-17"><a href="#cb59-17" aria-hidden="true" tabindex="-1"></a><span class="co">        mv multiqc.html {output.html}</span></span>
+<span id="cb59-18"><a href="#cb59-18" aria-hidden="true" tabindex="-1"></a><span class="co">        mv multiqc_data/multiqc_general_stats.txt {output.stats}</span></span>
+<span id="cb59-19"><a href="#cb59-19" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Now this depends on there being a <code>samples.tsv</code> file in the working directory. To make this a configurable parameter we can add it to the config file:</p>
+<div class="sourceCode" id="cb60"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb60-1"><a href="#cb60-1" aria-hidden="true" tabindex="-1"></a><span class="fu">sample_list</span><span class="kw">:</span><span class="at"> </span><span class="st">"samples.tsv"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>and update the code for populating the <code>samples</code> dictionary:</p>
+<div class="sourceCode" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a><span class="co"># define an empty 'samples' dictionary</span></span>
+<span id="cb61-2"><a href="#cb61-2" aria-hidden="true" tabindex="-1"></a>samples <span class="op">=</span> {}</span>
+<span id="cb61-3"><a href="#cb61-3" aria-hidden="true" tabindex="-1"></a><span class="co"># read the sample list file and populate the dictionary</span></span>
+<span id="cb61-4"><a href="#cb61-4" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(config[<span class="st">"sample_list"</span>], <span class="st">"r"</span>) <span class="im">as</span> fhin:</span>
+<span id="cb61-5"><a href="#cb61-5" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> line <span class="kw">in</span> fhin:</span>
+<span id="cb61-6"><a href="#cb61-6" aria-hidden="true" tabindex="-1"></a>        <span class="co"># strip the newline character from the end of the line</span></span>
+<span id="cb61-7"><a href="#cb61-7" aria-hidden="true" tabindex="-1"></a>        <span class="co"># then split by tab character to get the sample id and url</span></span>
+<span id="cb61-8"><a href="#cb61-8" aria-hidden="true" tabindex="-1"></a>        sample_id, url <span class="op">=</span> line.strip().split(<span class="st">"</span><span class="ch">\t</span><span class="st">"</span>)</span>
+<span id="cb61-9"><a href="#cb61-9" aria-hidden="true" tabindex="-1"></a>        <span class="co"># store the url in the dictionary with the sample id as key</span></span>
+<span id="cb61-10"><a href="#cb61-10" aria-hidden="true" tabindex="-1"></a>        samples[sample_id] <span class="op">=</span> url</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This way, anyone can take our Snakefile and just update the path to their own <code>sample_list</code> using the config file.</p>
+<div class="callout callout-style-default callout-note callout-titled" title="Quick recap">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Quick recap
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>In this section we’ve learned:</p>
+<ul>
+<li>How to generalize a Snakemake workflow.</li>
+</ul>
+</div>
+</div>
+</section>
+<section id="extra-material" class="level2" data-number="12">
+<h2 data-number="12" class="anchored" data-anchor-id="extra-material"><span class="header-section-number">12</span> Extra material</h2>
+<p>If you want to read more about Snakemake in general you can find several resources here:</p>
+<ul>
+<li>The Snakemake documentation is available on <a href="https://snakemake.readthedocs.io/en/stable/#">ReadTheDocs</a>.</li>
+<li>Here is another (quite in-depth) <a href="https://snakemake.readthedocs.io/en/stable/tutorial/tutorial.html#tutorial">tutorial</a>.</li>
+<li>If you have questions, check out <a href="https://stackoverflow.com/questions/tagged/snakemake">stack overflow</a>.</li>
+</ul>
+<section id="using-containers-in-snakemake" class="level3" data-number="12.1">
+<h3 data-number="12.1" class="anchored" data-anchor-id="using-containers-in-snakemake"><span class="header-section-number">12.1</span> Using containers in Snakemake</h3>
+<p>Snakemake also supports defining an Apptainer or Docker container for each rule (you will have time to work on the <a href="containers-1-introduction">Containers tutorial</a> later during the course). Analogous to using a rule-specific Conda environment, specify <code>container: "docker://some-account/rule-specific-image"</code> in the rule definition. Instead of a link to a container image, it is also possible to provide the path to a <code>*.sif</code> file (= a <em>Singularity image file</em>). When executing Snakemake, add the <code>--software-deployment-method apptainer</code> (or the shorthand <code>--sdm apptainer</code>) flag to the command line. For the given rule, an Apptainer container will then be created from the image or file that is provided in the rule definition on the fly by Snakemake and the rule will be run in this container.</p>
+<p>You can find pre-made Apptainer or Docker images for many tools on <a href="https://biocontainers.pro/">https://biocontainers.pro/</a> (bioinformatics-specific) or on <a href="https://hub.docker.com/">https://hub.docker.com/</a>.</p>
+<p>Here is an example for a rule and its execution:</p>
+<div class="sourceCode" id="cb62"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a>rule align_to_genome:</span>
+<span id="cb62-2"><a href="#cb62-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb62-3"><a href="#cb62-3" aria-hidden="true" tabindex="-1"></a>        temp(<span class="st">"results/bam/{sample_id,\w+}.bam"</span>)</span>
+<span id="cb62-4"><a href="#cb62-4" aria-hidden="true" tabindex="-1"></a>    <span class="bu">input</span>:</span>
+<span id="cb62-5"><a href="#cb62-5" aria-hidden="true" tabindex="-1"></a>        fastq <span class="op">=</span> <span class="st">"data/</span><span class="sc">{sample_id}</span><span class="st">.fastq.gz"</span>,</span>
+<span id="cb62-6"><a href="#cb62-6" aria-hidden="true" tabindex="-1"></a>        index <span class="op">=</span> expand(<span class="st">"results/bowtie2/</span><span class="sc">{genome_id}</span><span class="st">.</span><span class="sc">{substr}</span><span class="st">.bt2"</span>,</span>
+<span id="cb62-7"><a href="#cb62-7" aria-hidden="true" tabindex="-1"></a>            genome_id<span class="op">=</span>config[<span class="st">"genome_id"</span>],</span>
+<span id="cb62-8"><a href="#cb62-8" aria-hidden="true" tabindex="-1"></a>            substr<span class="op">=</span>[<span class="st">"1"</span>, <span class="st">"2"</span>, <span class="st">"3"</span>, <span class="st">"4"</span>, <span class="st">"rev.1"</span>, <span class="st">"rev.2"</span>])</span>
+<span id="cb62-9"><a href="#cb62-9" aria-hidden="true" tabindex="-1"></a>    log:</span>
+<span id="cb62-10"><a href="#cb62-10" aria-hidden="true" tabindex="-1"></a>        expand(<span class="st">"results/logs/align_to_genome/</span><span class="sc">{{</span><span class="st">sample_id</span><span class="sc">}}</span><span class="st">_</span><span class="sc">{genome_id}</span><span class="st">.log"</span>,</span>
+<span id="cb62-11"><a href="#cb62-11" aria-hidden="true" tabindex="-1"></a>            genome_id <span class="op">=</span> config[<span class="st">"genome_id"</span>])</span>
+<span id="cb62-12"><a href="#cb62-12" aria-hidden="true" tabindex="-1"></a>    container: <span class="st">"docker://quay.io/biocontainers/bowtie2:2.5.0--py310h8d7afc0_0"</span></span>
+<span id="cb62-13"><a href="#cb62-13" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb62-14"><a href="#cb62-14" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb62-15"><a href="#cb62-15" aria-hidden="true" tabindex="-1"></a><span class="co">        bowtie2 -x results/bowtie2/{config[genome_id]} -U {input.fastq} &gt; {output} 2&gt;{log}</span></span>
+<span id="cb62-16"><a href="#cb62-16" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Start your Snakemake workflow with the following command:</p>
+<div class="sourceCode" id="cb63"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--software-deployment-method</span> apptainer</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Feel free to modify the MRSA workflow according to this example. As Apptainer is a container software that was developed for HPC clusters, and for example the Mac version is still a beta version, it might not work to run your updated Snakemake workflow with Apptainer locally on your computer. In the next section we explain how you can run Snakemake workflows on UPPMAX where Apptainer is pre-installed.</p>
+</section>
+<section id="running-snakemake-workflows-on-hpc-clusters" class="level3" data-number="12.2">
+<h3 data-number="12.2" class="anchored" data-anchor-id="running-snakemake-workflows-on-hpc-clusters"><span class="header-section-number">12.2</span> Running Snakemake workflows on HPC clusters</h3>
+<p>If you need to run a Snakemake workflow on a high-performance computing (HPC) cluster you have a wide range of options at your disposal. Via the <a href="https://snakemake.github.io/snakemake-plugin-catalog/">plugin catalog</a> you can find plugins that will add support for various HPC schedulers to Snakemake.</p>
+<p>Here we will focus on how to run Snakemake workflows on clusters with SLURM, a workload manager commonly used on HPC clusters in Sweden such as <a href="https://www.uu.se/centrum/uppmax/resurser/kluster/rackham">Rackham</a>, <a href="https://www.nsc.liu.se/systems/tetralith/">Tetralith</a> and <a href="https://www.pdc.kth.se/hpc-services/computing-systems">Dardel</a>.</p>
+<div class="callout callout-style-default callout-tip callout-titled">
+<div class="callout-header d-flex align-content-center">
+<div class="callout-icon-container">
+<i class="callout-icon"></i>
+</div>
+<div class="callout-title-container flex-fill">
+Tip
+</div>
+</div>
+<div class="callout-body-container callout-body">
+<p>When running on remote clusters we highly recommend to use a session manager like <a href="https://github.com/tmux/tmux/wiki">tmux</a> or <a href="https://www.gnu.org/software/screen/manual/screen.html#Overview">screen</a> so that you can run your workflow in a session in the background while doing other things on the cluster or even logging out of the cluster.</p>
+</div>
+</div>
+<section id="option-1-run-the-entire-workflow-as-a-single-job" class="level4" data-number="12.2.1">
+<h4 data-number="12.2.1" class="anchored" data-anchor-id="option-1-run-the-entire-workflow-as-a-single-job"><span class="header-section-number">12.2.1</span> Option 1: Run the entire workflow as a single job</h4>
+<p>For short workflows with only a few rules that need the same compute resources in terms of CPU (cores) and memory, you can submit the entire workflow as a job directly to the SLURM scheduler, or start an interactive job (in your <code>tmux</code> or <code>screen</code> session) and run your Snakemake workflow as you would do that on your local machine. Make sure to give your job enough time to finish running all rules of your Snakemake workflow.</p>
+<p>If you choose this option, you don’t need to install anything from the plugin catalogue. However, your workflow may not run as efficiently as it could if you were to add SLURM support in Snakemake.</p>
+</section>
+<section id="option-2-use-built-in-slurm-support" class="level4" data-number="12.2.2">
+<h4 data-number="12.2.2" class="anchored" data-anchor-id="option-2-use-built-in-slurm-support"><span class="header-section-number">12.2.2</span> Option 2: Use built-in SLURM support</h4>
+<p>For workflows with long run times and/or where each rule requires different compute resources, Snakemake comes with built in functionality for interacting with the SLURM workload manager and send each rule as a job to the SLURM queue and to track the status of each job.</p>
+<p>In this case, you can start the workflow on the login node and let it run there until all jobs have finished. Given that workflows often consist of many rules, some of which may be highly resource demanding, this is the option we recommend when running most Snakemake workflows on HPC clusters.</p>
+<p>To add SLURM support to Snakemake you first need to install the <a href="https://snakemake.github.io/snakemake-plugin-catalog/plugins/executor/slurm.html">SLURM plugin</a> from the plugin catalog. This can be done with conda:</p>
+<div class="sourceCode" id="cb64"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="ex">conda</span> install <span class="at">-c</span> conda-forge snakemake-executor-plugin-slurm</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Once installed, adding the <code>--executor slurm</code> flag to your Snakemake command line call will enable the plugin. You also need to specify how many jobs Snakemake can submit to the SLURM queue at the same time with the <code>-j</code> flag. For example, to allow up to 100 jobs to be put into the queue at any given time, you would run Snakemake with the following command:</p>
+<div class="sourceCode" id="cb65"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--executor</span> slurm <span class="at">-j</span> 100 <span class="op">&lt;</span>other flags<span class="op">&gt;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+</section>
+</section>
+<section id="specifying-resources-for-slurm" class="level3" data-number="12.3">
+<h3 data-number="12.3" class="anchored" data-anchor-id="specifying-resources-for-slurm"><span class="header-section-number">12.3</span> Specifying resources for SLURM</h3>
+<p>Depending on the cluster you are using, you will need to specify some resource requirements for the rules in your workflow, such as the number of CPUs, memory, runtime and account id. This can be done either:</p>
+<ol type="1">
+<li>directly on the command line with the <code>--default-resources</code> flag which sets default resource settings for all rules</li>
+<li>in the rule definition of your workflow using the <code>resources:</code> directive, or</li>
+<li>in a <a href="https://snakemake.readthedocs.io/en/stable/executing/cli.html#profiles">configuration profile</a>, a folder with a <code>config.yaml</code> file that contains the resource settings.</li>
+</ol>
+<p>You can also use a combination of these methods. For example, the SLURM account id (_e.g.&nbsp;<code>naiss-2023-01-001</code>), which will most likely be the same for all rules, can be set with <code>--default-resources</code>:</p>
+<div class="sourceCode" id="cb66"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--executor</span> slurm <span class="at">-j</span> 100 <span class="at">--default-resources</span> slurm_account=naiss-2023-01-001</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Rule-specific resources such as runtime, memory and number of CPUs can be set in the rule definition, for example:</p>
+<div class="sourceCode" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a>rule testrule:</span>
+<span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/output.txt"</span></span>
+<span id="cb67-4"><a href="#cb67-4" aria-hidden="true" tabindex="-1"></a>    resources:</span>
+<span id="cb67-5"><a href="#cb67-5" aria-hidden="true" tabindex="-1"></a>        runtime <span class="op">=</span> <span class="dv">60</span>,</span>
+<span id="cb67-6"><a href="#cb67-6" aria-hidden="true" tabindex="-1"></a>        mem_mb <span class="op">=</span> <span class="dv">16000</span>,</span>
+<span id="cb67-7"><a href="#cb67-7" aria-hidden="true" tabindex="-1"></a>        cpus_per_task <span class="op">=</span> <span class="dv">4</span></span>
+<span id="cb67-8"><a href="#cb67-8" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb67-9"><a href="#cb67-9" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb67-10"><a href="#cb67-10" aria-hidden="true" tabindex="-1"></a><span class="co">        uname -a &gt; {output}</span></span>
+<span id="cb67-11"><a href="#cb67-11" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This rule uses the standard resource <code>runtime</code> to set the maximum allowed time (in minutes) for the rule, sets the memory requirement with <code>mem_mb</code> and the number of requested CPUs with <code>cpus_per_task</code>. In this example the rule will have a time limit of 60 minutes, will require 16G of RAM and 4 CPUs.</p>
+<p>Some clusters also require you to specify the <strong>partition</strong> you want to run your job on. The partition name will differ between clusters, for example the Rackham cluster uses <code>core</code> and <code>node</code> partitions, while Dardel uses <em>e.g.</em> <code>shared</code> and <code>main</code>. See the documentation for the cluster you are using for more information.</p>
+<p>The partition can be set with the <code>slurm_partition</code> resource, for example like so:</p>
+<div class="sourceCode" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>rule testrule:</span>
+<span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a>    output:</span>
+<span id="cb68-3"><a href="#cb68-3" aria-hidden="true" tabindex="-1"></a>        <span class="co">"results/output.txt"</span></span>
+<span id="cb68-4"><a href="#cb68-4" aria-hidden="true" tabindex="-1"></a>    resources:</span>
+<span id="cb68-5"><a href="#cb68-5" aria-hidden="true" tabindex="-1"></a>        runtime <span class="op">=</span> <span class="dv">60</span>,</span>
+<span id="cb68-6"><a href="#cb68-6" aria-hidden="true" tabindex="-1"></a>        mem_mb <span class="op">=</span> <span class="dv">16000</span>,</span>
+<span id="cb68-7"><a href="#cb68-7" aria-hidden="true" tabindex="-1"></a>        cpus_per_task <span class="op">=</span> <span class="dv">4</span>,</span>
+<span id="cb68-8"><a href="#cb68-8" aria-hidden="true" tabindex="-1"></a>        slurm_partition: <span class="st">"shared"</span></span>
+<span id="cb68-9"><a href="#cb68-9" aria-hidden="true" tabindex="-1"></a>    shell:</span>
+<span id="cb68-10"><a href="#cb68-10" aria-hidden="true" tabindex="-1"></a>        <span class="co">"""</span></span>
+<span id="cb68-11"><a href="#cb68-11" aria-hidden="true" tabindex="-1"></a><span class="co">        uname -a &gt; {output}</span></span>
+<span id="cb68-12"><a href="#cb68-12" aria-hidden="true" tabindex="-1"></a><span class="co">        """</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>To make it easy to adapt your workflow to different compute clusters it is recommended to define resource settings in a <strong>configuration profile</strong>. A configuration profile is a folder with a <code>config.yaml</code> file that contains values for Snakemake command line arguments, allowing you to modify the behavior of Snakemake without changing the workflow code. For example, you could create a <code>dardel</code> folder (<em>e.g.</em> in the root of your workflow) with a <code>config.yaml</code> file that contains the following:</p>
+<div class="sourceCode" id="cb69"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="fu">executor</span><span class="kw">:</span><span class="at"> </span><span class="st">"slurm"</span></span>
+<span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="fu">jobs</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span></span>
+<span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a><span class="fu">default-resources</span><span class="kw">:</span></span>
+<span id="cb69-4"><a href="#cb69-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">slurm_account</span><span class="kw">:</span><span class="at"> </span><span class="st">"naiss-2023-01-001"</span></span>
+<span id="cb69-5"><a href="#cb69-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">slurm_partition</span><span class="kw">:</span><span class="at"> </span><span class="st">"shared"</span></span>
+<span id="cb69-6"><a href="#cb69-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">mem_mb</span><span class="kw">:</span><span class="at"> </span><span class="dv">16000</span></span>
+<span id="cb69-7"><a href="#cb69-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">cpus_per_task</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
+<span id="cb69-8"><a href="#cb69-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">runtime</span><span class="kw">:</span><span class="at"> </span><span class="dv">60</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This yaml-formatted file contains Snakemake command line arguments that will be used when running the workflow. You can then run Snakemake with the <code>--profile</code> flag pointing to the folder containing the <code>config.yaml</code> file:</p>
+<div class="sourceCode" id="cb70"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--profile</span> dardel</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>This greatly simplifies running the workflow on different clusters, and makes the command line call much more succinct.</p>
+<p>To set rule-specific resources in the configuration profile, you can add a <code>set_resources:</code> section to the <code>config.yaml</code> file:</p>
+<div class="sourceCode" id="cb71"><pre class="sourceCode yaml code-with-copy"><code class="sourceCode yaml"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a><span class="fu">executor</span><span class="kw">:</span><span class="at"> </span><span class="st">"slurm"</span></span>
+<span id="cb71-2"><a href="#cb71-2" aria-hidden="true" tabindex="-1"></a><span class="fu">jobs</span><span class="kw">:</span><span class="at"> </span><span class="dv">100</span></span>
+<span id="cb71-3"><a href="#cb71-3" aria-hidden="true" tabindex="-1"></a><span class="fu">default-resources</span><span class="kw">:</span></span>
+<span id="cb71-4"><a href="#cb71-4" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">slurm_account</span><span class="kw">:</span><span class="at"> </span><span class="st">"naiss-2023-01-001"</span></span>
+<span id="cb71-5"><a href="#cb71-5" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">slurm_partition</span><span class="kw">:</span><span class="at"> </span><span class="st">"shared"</span></span>
+<span id="cb71-6"><a href="#cb71-6" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">mem_mb</span><span class="kw">:</span><span class="at"> </span><span class="dv">16000</span></span>
+<span id="cb71-7"><a href="#cb71-7" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">cpus_per_task</span><span class="kw">:</span><span class="at"> </span><span class="dv">4</span></span>
+<span id="cb71-8"><a href="#cb71-8" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">runtime</span><span class="kw">:</span><span class="at"> </span><span class="dv">60</span></span>
+<span id="cb71-9"><a href="#cb71-9" aria-hidden="true" tabindex="-1"></a><span class="fu">set_resources</span><span class="kw">:</span></span>
+<span id="cb71-10"><a href="#cb71-10" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">index_genome</span><span class="kw">:</span></span>
+<span id="cb71-11"><a href="#cb71-11" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">runtime</span><span class="kw">:</span><span class="at"> </span><span class="dv">240</span></span>
+<span id="cb71-12"><a href="#cb71-12" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_mb</span><span class="kw">:</span><span class="at"> </span><span class="dv">32000</span></span>
+<span id="cb71-13"><a href="#cb71-13" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus_per_task</span><span class="kw">:</span><span class="at"> </span><span class="dv">8</span></span>
+<span id="cb71-14"><a href="#cb71-14" aria-hidden="true" tabindex="-1"></a><span class="at">  </span><span class="fu">align_to_genome</span><span class="kw">:</span></span>
+<span id="cb71-15"><a href="#cb71-15" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">runtime</span><span class="kw">:</span><span class="at"> </span><span class="dv">120</span></span>
+<span id="cb71-16"><a href="#cb71-16" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">mem_mb</span><span class="kw">:</span><span class="at"> </span><span class="dv">24000</span></span>
+<span id="cb71-17"><a href="#cb71-17" aria-hidden="true" tabindex="-1"></a><span class="at">    </span><span class="fu">cpus_per_task</span><span class="kw">:</span><span class="at"> </span><span class="dv">6</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>In this example, the <code>index_genome</code> rule will have a runtime of 240 minutes, will require 32G of RAM and 8 CPUs, while the <code>align_to_genome</code> rule will have a runtime of 120 minutes, will require 24G of RAM and 6 CPUs. Both rules will use the <code>slurm_account</code> and <code>slurm_partition</code> settings from the <code>default_resources</code> section, unless overridden in the rule-specific settings.</p>
+<p>You can still define resources in the rule definition, but the values in the configuration profile will take precedence.</p>
+<p>Now, when you run your Snakemake workflow with:</p>
+<div class="sourceCode" id="cb72"><pre class="sourceCode bash code-with-copy"><code class="sourceCode bash"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a><span class="ex">snakemake</span> <span class="at">--profile</span> dardel</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
+<p>Snakemake will submit each job to the SLURM queue and inform you about both the local jobid and the SLURM jobid by writing something similar to this to your terminal:</p>
+<pre><code>Job 0 has been submitted with SLURM jobid 37099380 (log: .snakemake/slurm_logs/rule_name/37099380.log).</code></pre>
+<p>In this example the log output from the job will be in <code>.snakemake/slurm_logs/rule_name/37099380.log</code>.</p>
+<p>You can read more details about running Snakemake on compute clusters in the <a href="https://snakemake.readthedocs.io/en/stable/executing/cluster.html">Snakemake docs</a>.</p>
+
+
+</section>
+</section>
+
+</main> <!-- /main -->
+<script id="quarto-html-after-body" type="application/javascript">
+window.document.addEventListener("DOMContentLoaded", function (event) {
+  const toggleBodyColorMode = (bsSheetEl) => {
+    const mode = bsSheetEl.getAttribute("data-mode");
+    const bodyEl = window.document.querySelector("body");
+    if (mode === "dark") {
+      bodyEl.classList.add("quarto-dark");
+      bodyEl.classList.remove("quarto-light");
+    } else {
+      bodyEl.classList.add("quarto-light");
+      bodyEl.classList.remove("quarto-dark");
+    }
+  }
+  const toggleBodyColorPrimary = () => {
+    const bsSheetEl = window.document.querySelector("link#quarto-bootstrap");
+    if (bsSheetEl) {
+      toggleBodyColorMode(bsSheetEl);
+    }
+  }
+  toggleBodyColorPrimary();  
+  const icon = "";
+  const anchorJS = new window.AnchorJS();
+  anchorJS.options = {
+    placement: 'right',
+    icon: icon
+  };
+  anchorJS.add('.anchored');
+  const isCodeAnnotation = (el) => {
+    for (const clz of el.classList) {
+      if (clz.startsWith('code-annotation-')) {                     
+        return true;
+      }
+    }
+    return false;
+  }
+  const clipboard = new window.ClipboardJS('.code-copy-button', {
+    text: function(trigger) {
+      const codeEl = trigger.previousElementSibling.cloneNode(true);
+      for (const childEl of codeEl.children) {
+        if (isCodeAnnotation(childEl)) {
+          childEl.remove();
+        }
+      }
+      return codeEl.innerText;
+    }
+  });
+  clipboard.on('success', function(e) {
+    // button target
+    const button = e.trigger;
+    // don't keep focus
+    button.blur();
+    // flash "checked"
+    button.classList.add('code-copy-button-checked');
+    var currentTitle = button.getAttribute("title");
+    button.setAttribute("title", "Copied!");
+    let tooltip;
+    if (window.bootstrap) {
+      button.setAttribute("data-bs-toggle", "tooltip");
+      button.setAttribute("data-bs-placement", "left");
+      button.setAttribute("data-bs-title", "Copied!");
+      tooltip = new bootstrap.Tooltip(button, 
+        { trigger: "manual", 
+          customClass: "code-copy-button-tooltip",
+          offset: [0, -8]});
+      tooltip.show();    
+    }
+    setTimeout(function() {
+      if (tooltip) {
+        tooltip.hide();
+        button.removeAttribute("data-bs-title");
+        button.removeAttribute("data-bs-toggle");
+        button.removeAttribute("data-bs-placement");
+      }
+      button.setAttribute("title", currentTitle);
+      button.classList.remove('code-copy-button-checked');
+    }, 1000);
+    // clear code selection
+    e.clearSelection();
+  });
+  function tippyHover(el, contentFn, onTriggerFn, onUntriggerFn) {
+    const config = {
+      allowHTML: true,
+      maxWidth: 500,
+      delay: 100,
+      arrow: false,
+      appendTo: function(el) {
+          return el.parentElement;
+      },
+      interactive: true,
+      interactiveBorder: 10,
+      theme: 'quarto',
+      placement: 'bottom-start',
+    };
+    if (contentFn) {
+      config.content = contentFn;
+    }
+    if (onTriggerFn) {
+      config.onTrigger = onTriggerFn;
+    }
+    if (onUntriggerFn) {
+      config.onUntrigger = onUntriggerFn;
+    }
+    window.tippy(el, config); 
+  }
+  const noterefs = window.document.querySelectorAll('a[role="doc-noteref"]');
+  for (var i=0; i<noterefs.length; i++) {
+    const ref = noterefs[i];
+    tippyHover(ref, function() {
+      // use id or data attribute instead here
+      let href = ref.getAttribute('data-footnote-href') || ref.getAttribute('href');
+      try { href = new URL(href).hash; } catch {}
+      const id = href.replace(/^#\/?/, "");
+      const note = window.document.getElementById(id);
+      return note.innerHTML;
+    });
+  }
+  const xrefs = window.document.querySelectorAll('a.quarto-xref');
+  const processXRef = (id, note) => {
+    // Strip column container classes
+    const stripColumnClz = (el) => {
+      el.classList.remove("page-full", "page-columns");
+      if (el.children) {
+        for (const child of el.children) {
+          stripColumnClz(child);
+        }
+      }
+    }
+    stripColumnClz(note)
+    if (id === null || id.startsWith('sec-')) {
+      // Special case sections, only their first couple elements
+      const container = document.createElement("div");
+      if (note.children && note.children.length > 2) {
+        container.appendChild(note.children[0].cloneNode(true));
+        for (let i = 1; i < note.children.length; i++) {
+          const child = note.children[i];
+          if (child.tagName === "P" && child.innerText === "") {
+            continue;
+          } else {
+            container.appendChild(child.cloneNode(true));
+            break;
+          }
+        }
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(container);
+        }
+        return container.innerHTML
+      } else {
+        if (window.Quarto?.typesetMath) {
+          window.Quarto.typesetMath(note);
+        }
+        return note.innerHTML;
+      }
+    } else {
+      // Remove any anchor links if they are present
+      const anchorLink = note.querySelector('a.anchorjs-link');
+      if (anchorLink) {
+        anchorLink.remove();
+      }
+      if (window.Quarto?.typesetMath) {
+        window.Quarto.typesetMath(note);
+      }
+      // TODO in 1.5, we should make sure this works without a callout special case
+      if (note.classList.contains("callout")) {
+        return note.outerHTML;
+      } else {
+        return note.innerHTML;
+      }
+    }
+  }
+  for (var i=0; i<xrefs.length; i++) {
+    const xref = xrefs[i];
+    tippyHover(xref, undefined, function(instance) {
+      instance.disable();
+      let url = xref.getAttribute('href');
+      let hash = undefined; 
+      if (url.startsWith('#')) {
+        hash = url;
+      } else {
+        try { hash = new URL(url).hash; } catch {}
+      }
+      if (hash) {
+        const id = hash.replace(/^#\/?/, "");
+        const note = window.document.getElementById(id);
+        if (note !== null) {
+          try {
+            const html = processXRef(id, note.cloneNode(true));
+            instance.setContent(html);
+          } finally {
+            instance.enable();
+            instance.show();
+          }
+        } else {
+          // See if we can fetch this
+          fetch(url.split('#')[0])
+          .then(res => res.text())
+          .then(html => {
+            const parser = new DOMParser();
+            const htmlDoc = parser.parseFromString(html, "text/html");
+            const note = htmlDoc.getElementById(id);
+            if (note !== null) {
+              const html = processXRef(id, note);
+              instance.setContent(html);
+            } 
+          }).finally(() => {
+            instance.enable();
+            instance.show();
+          });
+        }
+      } else {
+        // See if we can fetch a full url (with no hash to target)
+        // This is a special case and we should probably do some content thinning / targeting
+        fetch(url)
+        .then(res => res.text())
+        .then(html => {
+          const parser = new DOMParser();
+          const htmlDoc = parser.parseFromString(html, "text/html");
+          const note = htmlDoc.querySelector('main.content');
+          if (note !== null) {
+            // This should only happen for chapter cross references
+            // (since there is no id in the URL)
+            // remove the first header
+            if (note.children.length > 0 && note.children[0].tagName === "HEADER") {
+              note.children[0].remove();
+            }
+            const html = processXRef(null, note);
+            instance.setContent(html);
+          } 
+        }).finally(() => {
+          instance.enable();
+          instance.show();
+        });
+      }
+    }, function(instance) {
+    });
+  }
+      let selectedAnnoteEl;
+      const selectorForAnnotation = ( cell, annotation) => {
+        let cellAttr = 'data-code-cell="' + cell + '"';
+        let lineAttr = 'data-code-annotation="' +  annotation + '"';
+        const selector = 'span[' + cellAttr + '][' + lineAttr + ']';
+        return selector;
+      }
+      const selectCodeLines = (annoteEl) => {
+        const doc = window.document;
+        const targetCell = annoteEl.getAttribute("data-target-cell");
+        const targetAnnotation = annoteEl.getAttribute("data-target-annotation");
+        const annoteSpan = window.document.querySelector(selectorForAnnotation(targetCell, targetAnnotation));
+        const lines = annoteSpan.getAttribute("data-code-lines").split(",");
+        const lineIds = lines.map((line) => {
+          return targetCell + "-" + line;
+        })
+        let top = null;
+        let height = null;
+        let parent = null;
+        if (lineIds.length > 0) {
+            //compute the position of the single el (top and bottom and make a div)
+            const el = window.document.getElementById(lineIds[0]);
+            top = el.offsetTop;
+            height = el.offsetHeight;
+            parent = el.parentElement.parentElement;
+          if (lineIds.length > 1) {
+            const lastEl = window.document.getElementById(lineIds[lineIds.length - 1]);
+            const bottom = lastEl.offsetTop + lastEl.offsetHeight;
+            height = bottom - top;
+          }
+          if (top !== null && height !== null && parent !== null) {
+            // cook up a div (if necessary) and position it 
+            let div = window.document.getElementById("code-annotation-line-highlight");
+            if (div === null) {
+              div = window.document.createElement("div");
+              div.setAttribute("id", "code-annotation-line-highlight");
+              div.style.position = 'absolute';
+              parent.appendChild(div);
+            }
+            div.style.top = top - 2 + "px";
+            div.style.height = height + 4 + "px";
+            div.style.left = 0;
+            let gutterDiv = window.document.getElementById("code-annotation-line-highlight-gutter");
+            if (gutterDiv === null) {
+              gutterDiv = window.document.createElement("div");
+              gutterDiv.setAttribute("id", "code-annotation-line-highlight-gutter");
+              gutterDiv.style.position = 'absolute';
+              const codeCell = window.document.getElementById(targetCell);
+              const gutter = codeCell.querySelector('.code-annotation-gutter');
+              gutter.appendChild(gutterDiv);
+            }
+            gutterDiv.style.top = top - 2 + "px";
+            gutterDiv.style.height = height + 4 + "px";
+          }
+          selectedAnnoteEl = annoteEl;
+        }
+      };
+      const unselectCodeLines = () => {
+        const elementsIds = ["code-annotation-line-highlight", "code-annotation-line-highlight-gutter"];
+        elementsIds.forEach((elId) => {
+          const div = window.document.getElementById(elId);
+          if (div) {
+            div.remove();
+          }
+        });
+        selectedAnnoteEl = undefined;
+      };
+        // Handle positioning of the toggle
+    window.addEventListener(
+      "resize",
+      throttle(() => {
+        elRect = undefined;
+        if (selectedAnnoteEl) {
+          selectCodeLines(selectedAnnoteEl);
+        }
+      }, 10)
+    );
+    function throttle(fn, ms) {
+    let throttle = false;
+    let timer;
+      return (...args) => {
+        if(!throttle) { // first call gets through
+            fn.apply(this, args);
+            throttle = true;
+        } else { // all the others get throttled
+            if(timer) clearTimeout(timer); // cancel #2
+            timer = setTimeout(() => {
+              fn.apply(this, args);
+              timer = throttle = false;
+            }, ms);
+        }
+      };
+    }
+      // Attach click handler to the DT
+      const annoteDls = window.document.querySelectorAll('dt[data-target-cell]');
+      for (const annoteDlNode of annoteDls) {
+        annoteDlNode.addEventListener('click', (event) => {
+          const clickedEl = event.target;
+          if (clickedEl !== selectedAnnoteEl) {
+            unselectCodeLines();
+            const activeEl = window.document.querySelector('dt[data-target-cell].code-annotation-active');
+            if (activeEl) {
+              activeEl.classList.remove('code-annotation-active');
+            }
+            selectCodeLines(clickedEl);
+            clickedEl.classList.add('code-annotation-active');
+          } else {
+            // Unselect the line
+            unselectCodeLines();
+            clickedEl.classList.remove('code-annotation-active');
+          }
+        });
+      }
+  const findCites = (el) => {
+    const parentEl = el.parentElement;
+    if (parentEl) {
+      const cites = parentEl.dataset.cites;
+      if (cites) {
+        return {
+          el,
+          cites: cites.split(' ')
+        };
+      } else {
+        return findCites(el.parentElement)
+      }
+    } else {
+      return undefined;
+    }
+  };
+  var bibliorefs = window.document.querySelectorAll('a[role="doc-biblioref"]');
+  for (var i=0; i<bibliorefs.length; i++) {
+    const ref = bibliorefs[i];
+    const citeInfo = findCites(ref);
+    if (citeInfo) {
+      tippyHover(citeInfo.el, function() {
+        var popup = window.document.createElement('div');
+        citeInfo.cites.forEach(function(cite) {
+          var citeDiv = window.document.createElement('div');
+          citeDiv.classList.add('hanging-indent');
+          citeDiv.classList.add('csl-entry');
+          var biblioDiv = window.document.getElementById('ref-' + cite);
+          if (biblioDiv) {
+            citeDiv.innerHTML = biblioDiv.innerHTML;
+          }
+          popup.appendChild(citeDiv);
+        });
+        return popup.innerHTML;
+      });
+    }
+  }
+});
+</script>
+</div> <!-- /content -->
+<footer class="footer">
+  <div class="nav-footer">
+    <div class="nav-footer-left">
+<p>2024 <a href="https://nbis.se">NBIS</a> | <a href="https://choosealicense.com/licenses/gpl-3.0/">GPL-3 License</a></p>
+</div>   
+    <div class="nav-footer-center">
+      &nbsp;
+    </div>
+    <div class="nav-footer-right">
+<p>Published with <a href="https://quarto.org/">Quarto</a> v1.4.549
+</p>
+</div>
+  </div>
+</footer>
+
+
+
+
+<script src="../site_libs/quarto-html/zenscroll-min.js"></script>
+</body></html>
\ No newline at end of file
diff --git a/search.json b/search.json
index 14c02dc2..bc78337d 100644
--- a/search.json
+++ b/search.json
@@ -41,6 +41,62 @@
     "section": "5 Docker",
     "text": "5 Docker\nDocker is infamous for quickly taking up huge amounts of space, and some maintenance is necessary every now and then. Here is how to uninstall Docker completely. Let’s start by removing individual images and containers:\n# Remove unused images\ndocker image prune\n\n# Remove stopped containers\ndocker container prune\n\n# Remove unused volumes (not used here, but included for reference)\ndocker volume prune\n\n# Stop and remove ALL containers\ndocker container rm $(docker container ls -a -q)\n\n# Remove ALL images\ndocker image rm $(docker image ls -a -q)\nRemoving Docker itself works differently on the three operating systems, which is described below:\n\n5.1 MacOS\nClick the Docker icon in the menu bar (upper right part of the screen) and select “Preferences”. In the upper right corner, you should find a little bug icon. Click on that icon and select “Reset to factory defaults”. You may have to fill in your password. Then select “Uninstall”. Once it’s done uninstalling, drag the Docker app from Applications to Trash.\n\n\n5.2 Linux\nIf you’ve installed Docker with apt-get, uninstall it like this:\napt-get purge docker-ce\nImages, containers, and volumes are not automatically removed. To delete all of them:\nrm -rf /var/lib/docker\n\n\n5.3 Windows\nUninstall Docker for Windows (on Windows 10) or Docker Toolbox (on Windows 7) via Control Panel &gt; Programs &gt; Programs and Features. Docker Toolbox will also have installed Oracle VM VirtualBox, so uninstall that as well if you’re not using it for other purposes."
   },
+  {
+    "objectID": "pages/quarto.html",
+    "href": "pages/quarto.html",
+    "title": "Working with Quarto",
+    "section": "",
+    "text": "The Quarto format (.qmd) is a multi-functional format, which is especially useful for scientific coding and analyses. Quarto documents can be used both to save and execute code as well as generating reports in various output formats. This is done by mixing markdown and so-called code chunks in the same document (we have course materials for markdown if you are unfamiliar with this format). The code itself as well as the output it generates can be included in the final report. Not only can Quarto work great for scientific coding, but can also be used for things such as presentation and websites - this entire workshop website is, in fact, created using only Quarto!\nQuarto makes your analysis more reproducible by connecting your code, figures and descriptive text. You can use it to make reproducible reports, rather than e.g. copy-pasting figures into a Word document. You can also use it as a notebook, in the same way as lab notebooks are used in a wet lab setting (or as we utilise Jupyter notebooks in the tutorial after this one). Quarto itself does not require any particular programming language to be installed - any language you want to use can be installed separately. The currently supported languages are R, Python, Julia and Observable. Quarto is fully compatible with both R Markdown and Jupyter documents.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already. Place yourself in the workshop-reproducible-research/tutorials/quarto/ directory, activate your quarto-env Conda environment and start your text editor or IDE of choice.\n\n\n\n\n\n\nA note on R Markdown\n\n\n\nQuarto is an evolution of the R Markdown format, which was previously used in this course. While R Markdown is a widely-used and excellent software for code and reports, Quarto is most easily thought of as “R Markdown 2.0”. If you’re familiar with R Markdown, you will find Quarto to be highly similar. The creators of both Quarto and R Markdown (Posit) have stated that R Markdown is not going to be deprecated, but most newer features will only come to Quarto. This means that if you’ve used R Markdown in the past now is a good time to make the switch, but you don’t have to. You can check out the Quarto website for more in-depth discussions regarding Quarto/R Markdown (dis-)similarities."
+  },
+  {
+    "objectID": "pages/quarto.html#introduction",
+    "href": "pages/quarto.html#introduction",
+    "title": "Working with Quarto",
+    "section": "",
+    "text": "The Quarto format (.qmd) is a multi-functional format, which is especially useful for scientific coding and analyses. Quarto documents can be used both to save and execute code as well as generating reports in various output formats. This is done by mixing markdown and so-called code chunks in the same document (we have course materials for markdown if you are unfamiliar with this format). The code itself as well as the output it generates can be included in the final report. Not only can Quarto work great for scientific coding, but can also be used for things such as presentation and websites - this entire workshop website is, in fact, created using only Quarto!\nQuarto makes your analysis more reproducible by connecting your code, figures and descriptive text. You can use it to make reproducible reports, rather than e.g. copy-pasting figures into a Word document. You can also use it as a notebook, in the same way as lab notebooks are used in a wet lab setting (or as we utilise Jupyter notebooks in the tutorial after this one). Quarto itself does not require any particular programming language to be installed - any language you want to use can be installed separately. The currently supported languages are R, Python, Julia and Observable. Quarto is fully compatible with both R Markdown and Jupyter documents.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already. Place yourself in the workshop-reproducible-research/tutorials/quarto/ directory, activate your quarto-env Conda environment and start your text editor or IDE of choice.\n\n\n\n\n\n\nA note on R Markdown\n\n\n\nQuarto is an evolution of the R Markdown format, which was previously used in this course. While R Markdown is a widely-used and excellent software for code and reports, Quarto is most easily thought of as “R Markdown 2.0”. If you’re familiar with R Markdown, you will find Quarto to be highly similar. The creators of both Quarto and R Markdown (Posit) have stated that R Markdown is not going to be deprecated, but most newer features will only come to Quarto. This means that if you’ve used R Markdown in the past now is a good time to make the switch, but you don’t have to. You can check out the Quarto website for more in-depth discussions regarding Quarto/R Markdown (dis-)similarities."
+  },
+  {
+    "objectID": "pages/quarto.html#the-basics",
+    "href": "pages/quarto.html#the-basics",
+    "title": "Working with Quarto",
+    "section": "2 The basics",
+    "text": "2 The basics\nLet’s start with creating basic Quarto document that we can work with.\n\n2.1 Creating Quarto documents\nQuarto documents are just plain text files with the .qmd extension. Create a new file called e.g. quarto-tutorial.qmd and copy the following into it:\n---\ntitle: \"Untitled Quarto Document\"\nauthor: \"Jane Doe\"\nformat: html\n---\nThis is a so-called YAML header, which is where we specify the general settings of the document in the form of key: value. The title and author are just what they sound like, while the format field specifies what type of output you want the final report to be in (alternatives include pdf, revealjs and many others). Here we have specified that we want HTML output, which is perhaps the most useful for scientific computing.\n\nChange the title to My first Quarto document and the author to your name.\n\nLet’s add some actual content to the document, starting with some basic markdown:\n\nAdd some text into your Quarto document (including an empty line between the YAML header and the text), e.g. the following:\n\nThis is my first Quarto document!\n\n# This is a header\n\nThis is where I'll soon add some *code* related to the first header.\nLet’s see what this document looks like when it’s rendered into HTML by Quarto:\n\nGo to the command line and type quarto render quarto-tutorial.qmd.\n\n\n\n\n\n\n\nRendering\n\n\n\nIf you’re using e.g. RStudio or VSCode to edit your Quarto document you might have access to a render button, which means you don’t have to run the above command from the command line if you prefer.\n\n\nOpen your new quarto-tutorial.html file that was created and see what it looks like. It’s only markdown content so far, so let’s add some R code using a code chunk:\n```{r}\nSys.Date()\n```\nNotice that we delimit the code chunk from the rest of the document’s contents using three backticks (```) and specify the R language using curly brackets ({r}). The code itself just prints the current date.\n\nRender the document again and see what it looks like.\n\nYou can also name chunks by adding it after the language:\n```{r}\nSys.Date()\n```\nThis is useful for debugging when something has gone wrong, since it’ll be easier to see exactly which code chunk an error happened (instead of just showing the chunk as a number).\nWe can also get in-line code using {r} &lt;R CODE&gt;, like so:\nThe current date is `{r} Sys.Date()`.\n\nAdd the example above and render the document again to make sure it worked.\n\n\n\n2.2 Previewing documents\nQuarto has a highly useful command for when you’re working on a document: preview. It’s essentially a live preview of the document you’re working on that will automatically render when you introduce changes to the document.\n\nType quarto preview quarto-tutorial.qmd in the command line.\n\nYour default web browser should now have opened a new window with your rendered document, while your command line should say something like the following:\nWatching files for changes\nBrowse at http://localhost:4175/\nYou can’t type new commands at the moment, because the Quarto Preview command is still running - it’s watching for any new changes to the Quarto document you specified.\n\nChange or add some markdown text to your Quarto document, e.g. This is a   code chunk instead of the previous text under the first header. Make sure you save the document.\n\nThe HTML document in your browser should have updated to reflect your newest changes automatically. Previewing documents is great when you want to have continuous feedback to the changes you make and can make the process of writing more seamless, since you don’t have to manually render all the time. Previewing will still render the entire document, however, meaning that if you have some heavy computations you might not want to re-render on every single save. For those cases you might instead prefer to stick with manual rendering when you are satisfied with multiple changes. You can abort a preview like any on-going command, e.g. using Ctrl-C.\nIn the rest of the tutorial it’s up to you whether you want to use preview or not - the tutorial will just mention when it’s time to render, you decide how that’s done.\n\n\n2.3 Rendering to PDF\nSo far we’ve only rendered to HTML, but sometimes you prefer a PDF. This entails changing the format option in the YAML header:\n\nChange the format to pdf in the header and render your document.\n\nYou can add any raw LaTeX commands you want to your document when you’re rendering to PDF, e.g. \\footnotsize to change the font size. You also have LaTeX-specific settings, such as setting the geometry for the whole document or specifying a citation method. While the details of LaTeX are outside the scope of this course, it’s useful to be aware of this functionality of Quarto so that you may use it if you already know LaTeX or if you want to learn it.\nSwitch back to HTML rendering before you move on.\n\n\n2.4 Languages\nThe examples so far have been using R, but we could just as easily have used Python. All we have to do is to change our code chunk to specify {python} as language and its content to be the equivalent Python code:\n```{python}\nfrom datetime import date\nprint(date.today())\n```\n\nChange the code chunk to the above Python chunk instead and render your document again.\n\n\n\n\n\n\n\nA note on Python in-line code\n\n\n\nQuarto support for in-line python code was added in version 1.4, so if you’re using an older version of Quarto simply remove the in-line code example. You can check your quarto version by running quarto --version on the commandline. As of this writing, the 1.4 version of Quarto can be obtained from the pre-release page: https://quarto.org/docs/download/prerelease\nIf you’re using Quarto version 1.4 or higher and want to try the in-line code example above for Python, change the line to:\nThe current date is `{python} date.strftime(date.today(), format=\"%Y-%m-%d\")`\n\n\nSo far we’ve had Quarto automatically determine which language engine should be used, which it detects through the code chunks we’ve written. We can also do this explicitly by adding engine: knitr or engine: jupyter to the YAML header.\n\nExplicitly add engine: jupyter to your YAML header and render the document.\n\n\n\n\n\n\n\nMaking sure your Jupyter engine is recognised\n\n\n\nQuarto attempts to identify a suitable Jupyter engine for your system when you include Python code chunks. However, if you want to use Jupyter available in a specific conda environment (e.g. your quarto-env environment) you need to take some extra steps. Please visit this link and follow steps 1-4. In the final step, check for the name of the kernel matching your quarto-env conda environment, e.g.\n$ jupyter kernelspec list\nAvailable kernels:\n...\n conda-env-quarto-env-py    /Users/&lt;your-user-name/Library/Jupyter/kernels/conda-env-quarto-env-py\nUsing the example output from above we can add the following to the YAML header of our Quarto document:\njupyter:\n kernelspec:\n   display_name: Python 3\n   language: python\n   name: conda-env-quarto-env-py\n\n\nIt can be useful to explicitly set the language for the document, as it makes it clearer from just the YAML header what language will be used. There are also more language-related options for Quarto, but we’ll save those for later in the tutorial.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section you learned how to create, edit and render basic Quarto documents using different languages."
+  },
+  {
+    "objectID": "pages/quarto.html#code-chunks",
+    "href": "pages/quarto.html#code-chunks",
+    "title": "Working with Quarto",
+    "section": "3 Code chunks",
+    "text": "3 Code chunks\nSometimes you want to add chunk options to the code chunks in your Quarto documents. They are also in YAML format and are prefixed with a special type of comment (#|). It can look something like this:\n```{python}\n#| echo: false\nfrom datetime import date\nprint(date.today())\n```\n\nAdd the chunk option above to your document and render the document again.\n\nNotice how we no longer see the code itself, just the output? This is because the echo option specifies just that: whether we see the code or not. There are a number of such chunk options that are useful to know about:\n\n\n\n\n\n\n\nChunk option\nEffect\n\n\n\n\necho\nInclude the chunk code in the output.\n\n\neval\nEvaluate the code chunk.\n\n\noutput\nInclude the results of executing the code in the output.\n\n\nwarning\nInclude warnings in the output.\n\n\nerror\nInclude errors in the output (note that this implies that errors executing code will not halt processing of the document).\n\n\ninclude\nPrevent both code and output from being included.\n\n\n\n\nCheck what happens if you change echo: False to eval: False.\n\nNow the code in the code chunk is not run, which means that if you previously added the python inline code it will no longer work because it depends on date from the datetime module that we import in the code chunk. Remove the inline code snippet if you added it. Then try rendering again. Now you should see the code itself but it won’t be run and therefore has no output.\n\n3.1 Figure options\nThere are also options related to figures, but for that we need to actually have some code that produces a figure.\n\nChange the YAML header to use R instead of Python, remove the Python code chunk and replace it with the following (don’t worry if you don’t understand the R code itself, it’s just as example):\n\n```{r}\nlibrary(\"ggplot2\")\nlibrary(\"palmerpenguins\")\ndata(penguins, package = \"palmerpenguins\")\nggplot(penguins, aes(x      = bill_length_mm,\n                     y      = body_mass_g,\n                     colour = species)) +\n    geom_point(size = 2) +\n    theme_bw() +\n    labs(x      = \"Bill length (mm)\",\n         y      = \"Body mass (g)\",\n         colour = \"Species\") +\n    ggtitle(\"Penguin weight and bill length\") +\n    theme(plot.title = element_text(hjust = 0.5)) +\n    scale_colour_manual(values = c(\"#c1dea0\", \"#85be42\", \"#425f21\"))\n```\nWhen you’ve rendered the document you should see both the code and a figure using the Palmer Penguins dataset. You should also see a warning along the lines of Removed 2 rows containing missing values.\n\nSuppress the warning by adding #| warning: false as a chunk option and render.\n\nThere are two chunk options related to figure sizes: fig-width and fig-height (expressed in inches). These allow you to experiment with your figures and make them look the way you want.\n\nAdd both the fig-width: 10 and fig-height: 5 chunk options and render.\n\n\n\n\n\n\n\nNote\n\n\n\nThese two chunk options are only available when using the Knitr engine, not for Jupyter. There is a way to set these for the whole document with Jupyter, though, which we’ll talk more about in the next section of the tutorial.\n\n\nYou can also add captions and alt text using fig-cap and fig-alt, respectively.\n\nAdd a suitable caption and alt text to the figure and render.\n\nIf you want to place the caption in the margin of your document you can use the cap-location chunk option.\n\nAdd cap-location: margin to your chunk options and render.\n\n\n\n\n\n\n\nNote\n\n\n\nOn some quarto versions the cap-location: option may not work as expected. If you experience this, try also adding #| label: fig-penguins to the chunk.\n\n\n\n\n3.2 Cross-references\nA convenient way to be able to refer to figures in text is by adding a figure label, which will automatically add a figure number before your caption.\n\nAdd a suitable label, e.g. label: fig-penguins to the chunk options.\n\nCross-references use the @ symbol and the corresponding label. You can thus write some markdown outside of a code chunk and refer to e.g. @fig-penguins, as per the example here. This is extremely useful if you’re writing a paper or a report where you want to refer to figures and content in the markdown text. Quarto even adds a clickable link to the figure itself as well!\n\n\n3.3 Sub-figures\nIt’s also possible to create sub-figures using Quarto, instead of using whatever plotting library that your created the figures with.\n\nAdd the following (almost identical) code at the bottom of the chunk you already have:\n\nggplot(penguins, aes(x      = bill_depth_mm,\n                     y      = body_mass_g,\n                     colour = species)) +\n    geom_point(size = 2) +\n    theme_bw() +\n    labs(x      = \"Bill depth (mm)\",\n         y      = \"Body mass (g)\",\n         colour = \"Species\") +\n    scale_colour_manual(values = c(\"#c1dea0\", \"#85be42\", \"#425f21\"))\n\nAlso add the following to the chunk options:\n\n#| fig-subcap:\n#|     - Bill length vs. body mass\n#|     - Bill depth vs. body mass\nYou should now see that we have two figures with separate sub-captions as well as the overall figure caption we previously added. We can also control the layout of these figures using the layout-ncol chunk option.\n\nAdd a layout-ncol: 2 chunk option and render the document.\n\nWe now have a different, two-column layout instead, but whether you prefer this or just a one-column layout is up to you.\n\n\n3.4 Tables\nTables work much in the same way as figures. It might, in our example, be nice to add a table with the data we previously plotted.\n\nAdd the following code chunk to your document and render it:\n\n```{r}\n#| label: tbl-penguins\n#| tbl-cap: Palmer penguins bill length, width and body mass.\n#| tbl-cap-location: margin\nknitr::kable(\n    penguins[1:10, c(\"species\", \"bill_length_mm\", \"bill_depth_mm\", \"body_mass_g\")],\n    col.names = c(\"Species\", \"Bill length (mm)\", \"Bill depth (mm)\", \"Body mass (g)\")\n)\n```\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section you learned several chunk, figure and table options, how cross-referencing works and how to add sub-figures."
+  },
+  {
+    "objectID": "pages/quarto.html#document-options",
+    "href": "pages/quarto.html#document-options",
+    "title": "Working with Quarto",
+    "section": "4 Document options",
+    "text": "4 Document options\nSo far we’ve mostly worked with chunk options, which are specific to the chunk they appear in. You can set many of these at the global document level, however, and there are also some options specifically for tailoring the document as a whole, regardless of chunk content.\nWe’ve already looked at some global options, such as title, author, format and engine. Something that would go nicely with the first two is the date option. You could just write the actual date if you like, or you can use the today option:\n\nAdd the following to the options: date: today\n\n\n4.1 Code folding\nA useful option we haven’t touched already is the code-fold option. This and similar global options are specified nested inside the format option, like so:\nformat:\n    html:\n        code-fold: true\n\nAdd the code-fold option to your document and render it.\n\nThis can be a nice default to use in scientific reports, as it hides the code by default but is always there for those who want to inspect it. You can also use the code-summary chunk option to specify a different text to show with the folded code instead of the default Code, e.g. code-summary: Click to show code.\nIf you want to add the code-summary option to all chunks you can add the following to the yaml header:\nlanguage:\n  code-summary: Click to show code\nYou can also add the code-tools option, which will add a drop-down menu to toggle visibility of all code as well as the ability to view the source of the document.\n\nAdd the code-tools: true option and render the document.\n\n\n\n4.2 Table of contents\nAnother useful document option is to add a table of contents, which can be done with the toc option. This will automatically populate the table of contents using the headers from your document.\n\nAdd some more headings and/or sub-headings to your document.\nAdd the toc: true option to the html format and render.\n\nThe table of contents is to the right of the document by default, but you can change it using toc-location. The toc-depth allows you to control how many sub-heading levels are included in the table of contents.\n\nAdd toc-location: left and toc-depth: 2 to your document and render it.\n\nHaving the table of contents on the left can be useful if you are using the margins for something, such as we are doing in this tutorial. You can similarly add section numbering using number-sections and number-depth. Smooth scrolling is not enabled by default, but you can add it using smooth-scroll: true. You can change the title of the table of contents using toc-title.\n\nAdd section numbers, depth, smooth scrolling and a different table of contents title to your document and render it.\n\n\n\n4.3 Themes\nQuarto has a lot of themes available for it.\n\nAdd theme: flatly under the HTML format option and render.\n\nIf you want to get real advanced you can play around with lots of details regarding the themes and adjust as you see fit, or even just create your own theme. This is a bit too advanced to go through here, but you can read about it more in the official documentation.\n\n\n4.4 Global chunk options\nThe chunk options we learnt about in the previous section of this tutorial can also be specified on the global document level. Instead of specifying e.g. warning: false or fig-height: 5 in individual chunks we can add it to the main YAML header in the same manner as for e.g. code folding or table of contents. We’ll still have to specify options like labels or captions at the chunk-level, though.\n\nAdd warning: false to your document header and remove it from the penguin figure chunk you already have.\n\n\n\n4.5 Embedding HTML resources\nWhen rendering HTML documents you get any figures and other resources in a &lt;document-name&gt;_files/ directory, which is not always desirable. It’s easier to move the HTML around if all figures etc. are embedded directly in the HTML itself, which can be done by specifying embed-resources: true in the HTML format options. This option is false by default, meaning that you’ll also have to include the previously mentioned directory if you want to share the HTML with anybody.\n\nRemove the &lt;document-name&gt;_files/ directory, refresh the rendered document and see what happens.\nAdd the embed_resources option and render your document again.\n\nWhat happened first is that your figures should have disappeared when you deleted the resources directory. Embedding resources and rendering again should not re-create this directory, so now you’ll just have a stand-alone HTML file that is more portable than before.\n\n\n4.6 Multiple formats\nSo far we’ve mostly been working with HTML output, but you don’t need to limit yourself to a single output format if you don’t want to.\n\nAdd the docx: default line in the format: part of your YAML header and render your document.\n\nYou should have gotten two separate output files now: a HTML and a DOCX (Word) file. You can specify further options for any of the formats you include, instead of just using the default settings as in this example.\n\nRender your document again, but supply the --to html flag.\n\nThis will only render to the specified output format, which is highly useful when you want to write a Quarto document with more than one format but not always render them all.\n\n\n4.7 Parameters\nThe last document-wide option we’ll touch on is parameters. This is useful for when you want to be able to run the same document with different parameters or options for some computations. How parameters are specified depends on which engine you’re using. With Knitr you can specify parameters using the params option:\n\nAdd the following code to your YAML header:\n\nparams:\n    point_size: 2\n\nAlso change the hard-coded geom_point(size = 2) to geom_point(size = params$point_size) in the two ggplot calls in the first code chunk.\n\nWe have thus specified a parameter called point_size in the YAML header and referred to it in the code using params$point_size. You can now change this parameter at run-time by supplying the -P &lt;param&gt;:&lt;value&gt; (or --execute-param) flag to quarto render.\nNotice that this won’t work if you want to use a parameter to control e.g. a chunk option like layout-ncol. For this we need to use an in-line code expression: #| layout-ncol: !expr params$ncols.\n\nAdd a parameter for the layout-ncol chunk option to the YAML header\nAlso add the layout-ncol chunk option to the figure chunk using the syntax above and render to make sure it works.\n\nNote that to modify multiple parameters at run-time you have to use the -P param:value flag multiple times, like so:\nquarto render quarto-tutorial.qmd -P point_size:4 -P ncols:1\nIf you’re using the Jupyter engine you can instead specify parameters by designating a single cell as a parameter cell, like so:\n```{python}\n#| tags: [parameters]\npoint_size = 2\n```\nYou can also specify parameters in a params.yml file and instruct quarto to use them with the --execute-params params.yml flag when rendering. Note that the parameters must be defined in the document (in the YAML header when using the knitr engine, or in a cell when using the jupyter engine). Pointing quarto to a params.yml file with --execute-params only overrides them when rendering.\nUsing parameters is extremely useful when you’re using a workflow manager system (e.g. Snakemake or Nextflow), since you can easily specify sample-specific parameters from the command line directly from your workflow manager.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this sections we covered a number of document-wide options, including code-folding, table of contents, theming, HTML portability, using multiple output formats and parameters."
+  },
+  {
+    "objectID": "pages/quarto.html#presentations",
+    "href": "pages/quarto.html#presentations",
+    "title": "Working with Quarto",
+    "section": "5 Presentations",
+    "text": "5 Presentations\nQuarto can also be used to create presentations in multiple formats such as reveal.js (HTML), beamer (PDF) and pptx (PowerPoint) - the most powerful of these formats by far is the first one. Creating presentations with Quarto is quite similar to creating general Quarto documents, with some added features to keep in mind.\n\n5.1 Slides\nThe first thing that’s needed for creating a presentation is deciding what constitutes a slide. The default is that slides are delimited by a document’s header levels.\n\nRender your document using the --to revealjs flag and open it.\n\nYou should now have the same document we’ve been working on for this tutorial in presentation format! You can step through the slides using the arrow keys, press F to go into full-screen mode, S to view speaker notes, M for the menu (you can also click in the lower left corner to get this menu) and ESC to go back.\nIf you’ve followed along you should have one level-1 header (#) and two level-2 headers (##). Notice that the level-1 header here will render as a blank page with just the header content on it, while the level-2 headers will render as normal slide headers. This all looks quite nice, and we didn’t even have to change a thing! Disregard that the table on the last slide doesn’t fit for now, we’ll get back to it later. Another method of delimiting slides is using a horizontal rule, ---, which allows you more fine-grained control over slides and their content (and is especially useful if you want to have a slide without a title).\n\n\n5.2 Divisions\nThere are many ways you can add presentation-specific content to your slides, some of which you’d recognise from e.g. PowerPoint functionality.\nSo called “divisions” or “divs” allow you to control the appearance of content in your slides.\nLet’s fix that issue with the table that was larger than the page. The problem here is one of content overflow, which can be fixed by adding a special {.smaller} div.\n\n\n\n\n\n\nNote\n\n\n\nDivs do not work for level1 headings (starting with a single #).\n\n\n\nAdd the {.smaller} div to the table header (it should read something like ## A table {.smaller}) and render.\n\nThat should have automatically re-sized the table to fit into the slide. Another way to solve this is to make slide content scrollable.\n\nChange the {.smaller} div to a {.scrollable} div and render.\n\nInstead of re-sizing the table we now get the ability to scroll down it instead; whichever solution you prefer is up to you.\nAdding divisions of various types like this is a common thing for Quarto presentations. Another common presentation-functionality is incremental lists, which can also be achieved with divisions. When adding a division to slide content we specify the division’s content in a manner similar to a code chunk, like in the following example:\n## Penguin species\n\n::: {.incremental}\n - Adelie\n - Chinstrap\n - Gentoo\n:::\n\nAdd the code above to your document and render it.\n\nStepping through incremental content works the same as for stepping through slides, i.e. using the arrow keys.\n\nRender your document to html instead of revealjs.\n\nNotice that Quarto rendered the HTML document just fine, even though you now have some presentation-specific code? This allows you to switch between the formats on-demand without having much overhead or format-specific code, which is great when you want to present your work without having to whip out a full-fledged presentation and all the work that goes into that!\nThere are other useful divisions as well, including {.notes} (speaker notes), {.aside} (additional commentary similar to footnotes), {.footer} (slide footers), which you can add in the same way as we did for the incremental list above.\n\nPick one of the above-mentioned divisions to add to your presentation and render it.\n\n\n\n\n\n\n\nNote\n\n\n\nThe notes and footer divisions will appear as normal Markdown text when rendering to HTML, while asides will appear in the margin. These divisions thus represent cases you might want to avoid if you want to be completely format-agnostic.\n\n\n\n\n5.3 Presentation options\nJust like the other formats you can specify presentation-specific options at the document-level using the YAML header. You could, for example, add the {.scrollable} or {.smaller} div to the entire document.\n\nAdd the revealjs format to the YAML header as well as a scrollable: true option to it.\n\nYou can also specify one of the built-in themes here.\n\nAdd theme: simple to your YAML header and render.\n\nYou can find the entire list of themes at the Quarto website.\n\n\n5.4 Multiple columns\nSometimes you’ll want to have more than one column in your presentation, which is done with the {.columns} and {.column} divisions. The former specifies that a section with multiple columns is starting, while the second specifies when each column starts, like so:\n:::: {.columns}\n\n::: {.column}\nLeft column\n:::\n\n::: {.column}\nRight column\n:::\n\n::::\n\nAdd multiple columns with some content to your presentation and render it.\n\nYou can also control the widths of these columns using e.g. {.column width=\"40%\"}.\n\n\n\n\n\n\nNote\n\n\n\nThe {.columns} div also works for a normal HTML render, so it’ll look the same regardless of whether you output as a document or a presentation.\n\n\n\n\n5.5 Fragments\nWe’ve already learnt how to get incremental lists working, but what about general content we want to incrementally step through? This is done with the {.fragment} div.\n\nAdd a {.fragment} div to some slide content and render.\n\nFragments are similar to “animations” from PowerPoint and come with lots of built-in variations, e.g. fade-out, grow, strike and several others.\n\nAdd a fragment variant to your content, e.g. {.fragment .grow} and render your document.\n\nYou can also control the order in which fragments appear using the fragment-index=&lt;NUMBER&gt; option.\n\nCreate a new slide and add some content with a different order of appearance than the order of the code. If you need help or inspiration, click below.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n## Why Palmer Penguins?\n\n::: {.fragment fragment-index=2}\n![](https://allisonhorst.github.io/palmerpenguins/logo.png){fig-align=\"center\"}\n:::\n\n::: {.fragment fragment-index=1}\nThe goal of `palmerpenguins` is to provide a good dataset for data exploration\nand visualization, as an alternative to `iris.`\n:::\n\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we covered how to create presentations using Quarto, including how to add various divisions, global slide-options, multiple columns and fragments."
+  },
+  {
+    "objectID": "pages/quarto.html#extra-material",
+    "href": "pages/quarto.html#extra-material",
+    "title": "Working with Quarto",
+    "section": "6 Extra material",
+    "text": "6 Extra material\nThe following material contains some more advanced things that you can do with Quarto but are not really part of the core of the Quarto material. It’s a mix of various functionalities, and you don’t have to go through it if you don’t want to.\nIf you’re interested in learning more about Quarto in general, here are some reading tips:\n\nThe Quarto documentation\nA gallery of Quarto examples\nAn awesome list of Quarto content\n\n\n6.1 Tabsets\nSometimes you’ll want to present the same content in different ways, e.g. the equivalent code in different languages. Look at the following toy example:\n::: {.panel-tabset}\n## R\n```{r}\nwords &lt;- c(\"Foo\", \"bar\")\nprint(paste(words), collapse = ' ')\n```\n\n## Python\n```{python}\nwords = [\"Foo\", \"bar\"]\nprint(' '.join(words))\n```\n:::\nTry adding that to a document and see that you’ll get a set of tabs that change the content of the code chunk to the respective language. This is not only useful for showing different languages, but can be used for other situations as well. For example, you might want to run different analyses and show them in different tabs, or even show different interactive elements in separate tabs.\n\n\n6.2 Callouts\nIf you’re writing some sort of documentation, tutorial or just want to draw special attention to something, callouts are here for you. They render as a coloured block with a header and content. There are five types of callouts: note, tip, warning, caution, and important. As with lots of Quarto things they are specified using a division, like so:\n::: {.callout-note}\nThis is a note callout.\n:::\nThe different callouts come with appropriate colours by default, which you can change in the theme. You can also have collapsible callouts by adding the collapse=true option, where true will have the callout collapsed by default. You can also specify titles in the same way using the title=&lt;TITLE&gt; option or by adding the title directly to the callout content, like so:\n::: {.callout-note}\n## This is the callout title\n\nThis is a note callout.\n:::\nYou can change the overall appearance of callouts by using the appearance option or the callout-appearance global option. Valid values are default, simple and minimal, with decreasing usage of colours and weights. You can also suppress the callout icons using icon=false or callout-icon: false in a similar manner.\n\n\n6.3 Mixing R and Python\nEarlier in the tutorial we showed how to change the language using the engine global option, but there is actually a way to use both R and Python in the same Quarto document. This is done via the Knitr engine and the reticulate R package, which allows communication between any variables and data you store in either R or Python code chunks. While this may not be that common of a use-case, it’s still great that it’s there for those that want access to it. We won’t go through the details of how this works here, but you’re welcome to go and check out the official reticulate website for yourself.\nIf you just want to mix R and Python in a single Quarto document without the interoperability between the languages it’s a lot simpler, though. You can either just install the reticulate package (r-reticulate in Conda) or add the python.reticulate=FALSE chunk option to the Python chunks.\n\n\n6.4 Citations\nYou can actually write whole articles in Quarto! For that purpose, it’s also great that you can cite things from a bibliography as well. Specifying the bibliography file(s) is done using the bibliography global option; specifying the citation style can be done using a csl (Citation Style Language) file and the csl global option. Citation itself is similar to cross-referencing (@cross-ref), but is surrounded by square brackets: [@citation]. You can read more details about citations at the Quarto website."
+  },
+  {
+    "objectID": "pages/markdown.html",
+    "href": "pages/markdown.html",
+    "title": "Markdown",
+    "section": "",
+    "text": "A markup language is a system for annotating text documents in order to e.g. define formatting. HTML, if you are familiar with that, is an example of a markup language. HTML uses tags, such as:\n&lt;h1&gt; Heading &lt;/h1&gt;\n&lt;h2&gt; Sub-heading &lt;/h2&gt;\n&lt;a href=\"www.webpage.com\"&gt; Link &lt;/a&gt;\n&lt;ul&gt;\n  &lt;li&gt; List-item1 &lt;/li&gt;\n  &lt;li&gt; List-item2 &lt;/li&gt;\n  &lt;li&gt; List-item3 &lt;/li&gt;\n&lt;/ul&gt;\nMarkdown is a lightweight markup language which uses plain-text syntax in order to be as unobtrusive as possible, so that a human can easily read it. Look at the following toy example:\n# A header\n\nA [link](http://example.com).\n\n## A sub-header\n\nText attributes _italic_, *italic*, **bold**, `monospace`.\n\n### A deeper sub-header\n\nBullet list:\n\n  - Apples\n  - Oranges\n  - Pears\nThis would render to something like this:\n\nA markdown document can be converted to other formats, such as HTML or PDF, for viewing in a browser or a PDF reader; in fact, the page you are reading right now is written in markdown. Markdown is somewhat ill-defined, and as a consequence of that there exist many implementations and extensions. They share most of the syntax, however, with various additions on top.\nThere are a lot more things you can do with markdown than what we show here. Indeed, this entire course is mostly written in markdown! You can read more about markdown here."
+  },
   {
     "objectID": "pages/introduction.html",
     "href": "pages/introduction.html",
@@ -55,6 +111,69 @@
     "section": "The case study",
     "text": "The case study\nWe will be running a small bioinformatics project as a case study, and use that to exemplify the different steps of setting up a reproducible research project. To give you some context, the study background and analysis steps are briefly described below.\n\nBackground\nThe data is taken from Osmundson, Dewell, and Darst (2013), who have studied methicillin-resistant Staphylococcus aureus (MRSA). MRSA is resistant to broad spectrum beta-lactam antibiotics and lead to difficult-to-treat infections in humans. Lytic bacteriophages have been suggested as potential therapeutic agents, or as the source of novel antibiotic proteins or peptides. One such protein, gp67, was identified as a transcription-inhibiting transcription factor with an antimicrobial effect. To identify S. aureus genes repressed by gp67, the authors expressed gp67 in S. aureus cells. RNA-seq was then performed on three S. aureus strains:\n\nRN4220 with pRMC2 with gp67\nRN4220 with empty pRMC2\nNCTC8325-4\n\n\n\nAnalysis\nThe graph below shows the different steps of the analysis that are included in this project:\n\nThe input files are:\n\nRNA-seq raw data (FASTQ files) for the three strains\nS. aureus genome sequence (a FASTA file)\nS. aureus genome annotation (a GFF file)\n\nThe workflow itself will perform the following tasks:\n\nDownloading and indexing of the reference genome using Bowtie2\nDownloading the raw FASTQ data from the Sequence Read Archive (SRA)\nRun some quality controls on the data using FastQC and MultiQC\nAlign the raw data to the genome and calculate the gene expression using featureCounts\nProduce supplementary materials using data from quality controls, gene expression and the workflow figure shown above"
   },
+  {
+    "objectID": "pages/containers.html",
+    "href": "pages/containers.html",
+    "title": "Controlling your environment with Containers",
+    "section": "",
+    "text": "Container-based technologies are designed to make it easier to create, deploy, and run applications by isolating them in self-contained software units (hence their name). The idea is to package software and/or code together with everything it needs (other packages it depends, various environment settings, etc.) into one unit, i.e. a container. This way we can ensure that the software or code functions in exactly the same way regardless of where it’s executed. Containers are in many ways similar to virtual machines but more lightweight. Rather than starting up a whole new operating system, containers can use the same kernel (usually Linux) as the system that they’re running on. This makes them much faster and smaller compared to virtual machines. While this might sound a bit technical, actually using containers is quite smooth and very powerful.\nContainers have also proven to be a very good solution for packaging, running and distributing scientific data analyses. Some applications of containers relevant for reproducible research are:\n\nWhen publishing, package your analyses in a container image and let it accompany the article. This way interested readers can reproduce your analysis at the push of a button.\nPackaging your analysis in a container enables you to develop on e.g. your laptop and seamlessly move to cluster or cloud to run the actual analysis.\nSay that you are collaborating on a project and you are using Mac while your collaborator is using Windows. You can then set up a container image specific for your project to ensure that you are working in an identical environment.\n\nOne of the largest and most widely used container-based technologies is Docker. Just as with Git, Docker was designed for software development but is rapidly becoming widely used in scientific research. Another container-based technology is Apptainer (and the related Singularity), which was developed to work well in computer cluster environments such as Uppmax. We will cover both Docker and Apptainer in this course, but the focus will be be on the former (since that is the most widely used and runs on all three operating systems).\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to install Docker if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/containers.\n\n\n\n\n\n\nDockage and storage\n\n\n\nDocker images tend to take up quite a lot of space. In order to do all the exercises in this tutorial you need to have ~10 GB available."
+  },
+  {
+    "objectID": "pages/containers.html#introduction",
+    "href": "pages/containers.html#introduction",
+    "title": "Controlling your environment with Containers",
+    "section": "",
+    "text": "Container-based technologies are designed to make it easier to create, deploy, and run applications by isolating them in self-contained software units (hence their name). The idea is to package software and/or code together with everything it needs (other packages it depends, various environment settings, etc.) into one unit, i.e. a container. This way we can ensure that the software or code functions in exactly the same way regardless of where it’s executed. Containers are in many ways similar to virtual machines but more lightweight. Rather than starting up a whole new operating system, containers can use the same kernel (usually Linux) as the system that they’re running on. This makes them much faster and smaller compared to virtual machines. While this might sound a bit technical, actually using containers is quite smooth and very powerful.\nContainers have also proven to be a very good solution for packaging, running and distributing scientific data analyses. Some applications of containers relevant for reproducible research are:\n\nWhen publishing, package your analyses in a container image and let it accompany the article. This way interested readers can reproduce your analysis at the push of a button.\nPackaging your analysis in a container enables you to develop on e.g. your laptop and seamlessly move to cluster or cloud to run the actual analysis.\nSay that you are collaborating on a project and you are using Mac while your collaborator is using Windows. You can then set up a container image specific for your project to ensure that you are working in an identical environment.\n\nOne of the largest and most widely used container-based technologies is Docker. Just as with Git, Docker was designed for software development but is rapidly becoming widely used in scientific research. Another container-based technology is Apptainer (and the related Singularity), which was developed to work well in computer cluster environments such as Uppmax. We will cover both Docker and Apptainer in this course, but the focus will be be on the former (since that is the most widely used and runs on all three operating systems).\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to install Docker if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/containers.\n\n\n\n\n\n\nDockage and storage\n\n\n\nDocker images tend to take up quite a lot of space. In order to do all the exercises in this tutorial you need to have ~10 GB available."
+  },
+  {
+    "objectID": "pages/containers.html#the-basics",
+    "href": "pages/containers.html#the-basics",
+    "title": "Controlling your environment with Containers",
+    "section": "2 The basics",
+    "text": "2 The basics\nWe’re almost ready to start, just one last note on nomenclature. You might have noticed that we sometimes refer to “Docker images” and sometimes to “Docker containers”. We use images to start containers, so containers are simply an instances of an image. You can have an image containing, say, a certain Linux distribution, and then start multiple containers running that same OS.\n\n\n\n\n\n\nRoot privileges are required\n\n\n\nIf you don’t have root privileges you have to prepend all Docker commands with sudo.\n\n\n\n2.1 Downloading images\nDocker containers typically run Linux, so let’s start by downloading an image containing Ubuntu (a popular Linux distribution that is based on only open-source tools) through the command line.\ndocker pull ubuntu:latest\nYou will notice that it downloads different layers with weird hashes as names. This represents a very fundamental property of Docker images that we’ll get back to in just a little while. The process should end with something along the lines of:\nStatus: Downloaded newer image for ubuntu:latest\ndocker.io/library/ubuntu:latest\nLet’s take a look at our new and growing collection of Docker images:\ndocker image ls\nThe Ubuntu image should show up in this list, with something looking like this:\nREPOSITORY       TAG              IMAGE ID            CREATED             SIZE\nubuntu           latest           d70eaf7277ea        3 weeks ago         72.9MB\n\n\n2.2 Running containers\nWe can now start a container from the image we just downloaded. We can refer to the image either by “REPOSITORY:TAG” (“latest” is the default so we can omit it) or “IMAGE ID”. The syntax for docker run is docker run [OPTIONS] IMAGE [COMMAND] [ARG...]. To see the available options run docker run --help. The COMMAND part is any command that you want to run inside the container, it can be a script that you have written yourself, a command line tool or a complete workflow. The ARG part is where you put optional arguments that the command will use.\nLet’s run uname -a to get some info about the operating system. In this case, uname is the COMMAND and -a the ARG. This command will display some general info about your system, and the -a argument tells uname to display all possible information.\nFirst run it on your own system (use systeminfo if you are on Windows):\nuname -a\nThis should print something like this to your command line:\nDarwin liv433l.lan 15.6.0 Darwin Kernel Version 15.6.0: Mon Oct  2 22:20:08 PDT 2017; root:xnu-3248.71.4~1/RELEASE_X86_64 x86_64\nSeems like I’m running the Darwin version of MacOS. Then run it in the Ubuntu Docker container:\ndocker run ubuntu uname -a\nHere I get the following result:\nLinux 24d063b5d877 5.4.39-linuxkit #1 SMP Fri May 8 23:03:06 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux\nAnd now I’m running on Linux! What happens is that we use the downloaded ubuntu image to run a container that has Ubuntu as the operating system, and we instruct Docker to execute uname -a to print the system info within that container. The output from the command is printed to the terminal.\nTry the same thing with whoami instead of uname -a.\n\n\n2.3 Running interactively\nSo, seems we can execute arbitrary commands on Linux. This looks useful, but maybe a bit limited. We can also get an interactive terminal with the flags -it.\ndocker run -it ubuntu\nYour prompt should now look similar to:\nroot@1f339e929fa9:/#\nYou are now using a terminal inside a container running Ubuntu. Here you can do whatever; install, run, remove stuff. Anything you do will be isolated within the container and never affect your host system.\nNow exit the container with exit.\n\n\n2.4 Containers inside scripts\nOkay, so Docker lets us work in any OS in a quite convenient way. That would probably be useful on its own, but Docker is much more powerful than that. For example, let’s look at the shell part of the index_genome rule in the Snakemake workflow for the MRSA case study:\nshell:\n    \"\"\"\n    bowtie2-build tempfile results/bowtie2/{wildcards.genome_id} &gt; {log}\n    \"\"\"\nYou may have seen that one can use containers through both Snakemake and Nextflow if you’ve gone through their tutorial’s extra material, but we can also use containers directly inside scripts in a very simple way. Let’s imagine we want to run the above command using containers instead. How would that look? It’s quite simple, really: first we find a container image that has bowtie2 installed, and then prepend the command with docker run &lt;image&gt;.\nFirst of all we need to download the genome to index though, so run:\ncurl -o NCTC8325.fa.gz ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz\ngunzip -c NCTC8325.fa.gz &gt; tempfile\nTo download and prepare the input for Bowtie2.\nNow try running the following Bash code:\ndocker run -v $(pwd):/analysis quay.io/biocontainers/bowtie2:2.5.1--py39h3321a2d_0 bowtie2-build /analysis/tempfile /analysis/NCTC8325\nDocker will automatically download the container image for Bowtie2 version 2.5.1 from the remote repository https://quay.io/repository/biocontainers/bowtie2 and subsequently run the command! This is the docker run [OPTIONS] IMAGE [COMMAND] [ARG...] syntax just like before. In this case quay.io/biocontainers/bowtie2:2.5.1--py39h3321a2d_0 is the IMAGE but instead of first downloading and then running it we point to its remote location directly, which will cause Docker to download it on the fly. The bowtie2-build part is the COMMAND followed by the ARG (the input tempfile and the output index)\nThe -v $(pwd):/analysis part is the OPTIONS which we use to mount the current directory inside the container in order to make the tempfile input available to Bowtie2. More on these so-called “Bind mounts” in Section 4 of this tutorial.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use docker pull for downloading remotely stored images\nHow to use docker image ls for getting information about the images we have on our system.\nHow to use docker run for starting a container from an image.\nHow to use the -it flag for running in interactive mode.\nHow to use Docker inside scripts."
+  },
+  {
+    "objectID": "pages/containers.html#building-images",
+    "href": "pages/containers.html#building-images",
+    "title": "Controlling your environment with Containers",
+    "section": "3 Building images",
+    "text": "3 Building images\nIn the previous section we downloaded a Docker image of Ubuntu and noticed that it was based on layers, each with a unique hash as id. An image in Docker is based on a number of read-only layers, where each layer contains the differences to the previous layers. If you’ve done the Git tutorial this might remind you of how a Git commit contains the difference to the previous commit. The great thing about this is that we can start from one base layer, say containing an operating system and some utility programs, and then generate many new images based on this, say 10 different project-specific images. This dramatically reduces the storage space requirements. For example, Bioconda (see the Conda tutorial) has one base image and then one individual layer for each of the more than 3000 packages available in Bioconda.\nDocker provides a convenient way to describe how to go from a base image to the image we want by using a “Dockerfile”. This is a simple text file containing the instructions for how to generate each layer. Docker images are typically quite large, often several GBs, while Dockerfiles are small and serve as blueprints for the images. It is therefore good practice to have your Dockerfile in your project Git repository, since it allows other users to exactly replicate your project environment.\nWe will be looking at a Dockerfile called Dockerfile_slim that is located in your containers directory (where you should hopefully be standing already). We will now go through that file and discuss the different steps and what they do. After that we’ll build the image and test it out. Lastly, we’ll start from that image and make a new one to reproduce the results from the Conda tutorial.\n\n3.1 Understanding Dockerfiles\nHere are the first few lines of Dockerfile_slim. Each line in the Dockerfile will typically result in one layer in the resulting image. The format for Dockerfiles is INSTRUCTION arguments. A full specification of the format, together with best practices, can be found here.\nFROM condaforge/miniforge3\n\nLABEL description = \"Minimal image for the NBIS reproducible research course.\"\nMAINTAINER \"John Sundh\" john.sundh@scilifelab.se\nHere we use the instructions FROM, LABEL and MAINTAINER. While LABEL and MAINTAINER is just meta-data that can be used for organizing your various Docker components the important one is FROM, which specifies the base image we want to start from. Because we want to use conda to install packages we will start from an image from the conda-forge community that has conda pre-installed. This image was in turn built using a Dockerfile as a blueprint and then uploaded to Dockerhub. The conda-forge community keeps the Dockerfile in a git repository and you can view the file here. You will see that it starts from an official Ubuntu image (check the first line with the FROM instruction), followed by code to install various packages including conda.\n\n\n\n\n\n\nThere are many roads to Rome\n\n\n\nWhen it comes to choosing the best image to start from there are multiple routes you could take. Say you want to run RStudio in a Conda environment through a Jupyter notebook. You could then start from one of the rocker images for R, a Condaforge image, or a Jupyter image. Or you just start from one of the low-level official images and set up everything from scratch.\n\n\nLet’s take a look at the next section of Dockerfile_slim.\n# Use bash as shell\nSHELL [\"/bin/bash\", \"--login\", \"-c\"]\n\n# Set workdir\nWORKDIR /course\n\n# Set time zone\nENV TZ=\"Europe/Stockholm\"\nENV DEBIAN_FRONTEND=noninteractive\nSHELL simply sets which shell to use and WORKDIR determines the directory the container should start in. The ENV instruction is used to set environmental variables and here we use it to set the time zone by declaring a TZ variable. The DEBIAN_FRONTEND=noninteractive line means that we force the subsequent installation to not prompt us to set the time zone manually.\nThe next few lines introduce the important RUN instruction, which is used for executing shell commands:\n# Install package for setting time zone\nRUN apt-get update && apt-get install -y tzdata && apt-get clean\n\n# Configure Conda\nRUN conda init bash && conda config --set channel_priority strict && \\\n    conda config --append channels bioconda && \\\n    conda config --append channels r && \\\n    conda config --set subdir linux-64\nThe first RUN command installs the tzdata package for managing local time settings in the container. This may not always be required for your Dockerfile but it’s added here because some R packages used in the course require it.\n\n\n\n\n\n\nNote\n\n\n\nWhile installing things with apt-get inside Dockerfiles is relatively common practice, it’s important to note that this may affect reproducibility, since it’s not common to specify an exact version. The packages installed in this manner are, however, usually not important for the actual analyses performed, but rather help in the building of the container image itself. While not critical, it’s important to note this from a reproducibility perspective.\n\n\nNext, we run conda init bash to initialize the bash shell inside the image, meaning we can use conda activate in containers that run from the image. In the same RUN statement we also configure the strict channel priority and add appropriate channels with conda config. You’ll probably recognize this from the pre-course-setup. The last part sets the somewhat obscure subdir config parameter pointing to the linux-64 architecture of conda channels.\nAs a general rule, you want each layer in an image to be a “logical unit”. For example, if you want to install a program the RUN command should both retrieve the program, install it and perform any necessary clean up. This is due to how layers work and how Docker decides what needs to be rerun between builds. More on this later.\nNext up is:\n# Open port for running Jupyter Notebook\nEXPOSE 8888\n\n# Start Bash shell by default\nCMD /bin/bash\nEXPOSE opens up the port 8888, so that we can later run a Jupyter Notebook server on that port. CMD is an interesting instruction. It sets what a container should run when nothing else is specified, i.e. if you run docker run [OPTIONS] [IMAGE] without the additional [COMMAND] [ARG]. It can be used for example for printing some information on how to use the image or, as here, start a Bash shell for the user. If the purpose of your image is to accompany a publication then CMD could be to run the workflow that generates the paper figures from raw data, e.g. CMD snakemake -s Snakefile -c 1 generate_figures.\n\n\n3.2 Building from Dockerfiles\nNow we understand how a Dockerfile works. Constructing the image itself from the Dockerfile can be done as follows - try it out:\n\n\n\n\n\n\nImage platforms on newer Macs\n\n\n\nIf your computer is a MAC with the M1 chip, you may have to add --platform linux/x86_64 to the docker build command.\n\n\ndocker build -f Dockerfile_slim -t my_docker_image .\nThis should result in something similar to this:\n [+] Building 2.2s (7/7) FINISHED\n =&gt; [internal] load build definition from Dockerfile_slim                                                                                                                                             0.0s\n =&gt; =&gt; transferring dockerfile: 667B                                                                                                                                                                  0.0s\n =&gt; [internal] load .dockerignore                                                                                                                                                                     0.0s\n =&gt; =&gt; transferring context: 2B                                                                                                                                                                       0.0s\n =&gt; [internal] load metadata for docker.io/condaforge/miniforge3:latest                                                                                                                               0.0s\n =&gt; [1/3] FROM docker.io/condaforge/miniforge3                                                                                                                                                        0.0s\n =&gt; CACHED [2/3] WORKDIR /course                                                                                                                                                                      0.0s\n =&gt; [3/3] RUN conda init bash && conda config --set channel_priority strict &&     conda config --append channels bioconda &&     conda config --append channels r &&     conda config --set subdir   2.1s\n =&gt; exporting to image                                                                                                                                                                                0.0s\n =&gt; =&gt; exporting layers                                                                                                                                                                               0.0s\n =&gt; =&gt; writing image sha256:53e6efeaa063eadf44c509c770d887af5e222151f08312e741aecc687e6e8981                                                                                                          0.0s\n =&gt; =&gt; naming to docker.io/library/my_docker_image\nExactly how the output looks depends on which version of Docker you are using. The -f flag sets which Dockerfile to use and -t tags the image with a name. This name is how you will refer to the image later. Lastly, the . is the path to where the image should be build (. means the current directory). This had no real impact in this case, but matters if you want to import files. Validate with docker image ls that you can see your new image.\n\n\n3.3 Creating your own Dockerfile\nNow it’s time to make your own Dockerfile to reproduce the results from the Conda tutorial. If you haven’t done the tutorial, it boils down to creating a Conda environment file, setting up that environment, downloading three RNA-seq data files, and running FastQC on those files. We will later package and run the whole RNA-seq workflow in a Docker container, but for now we keep it simple to reduce the size and time required.\nThe Conda tutorial uses a shell script, run_qc.sh, for downloading and running the analysis. A copy of this file should also be available in your current directory. If we want to use the same script we need to include it in the image. A basic outline of what we need to do is:\n\nCreate a file called Dockerfile_conda\nStart the image from the my_docker_image we just built\nInstall the package fastqc which is required for the analysis.\nAdd the run_qc.sh script to the image\nSet the default command of the image to run the run_qc.sh script.\n\nWe’ll now go through these steps in more detail. Try to add the corresponding code to Dockerfile_conda on your own, and if you get stuck you can click to reveal the solution below under “Click to show solution”.\nSet image starting point\nTo set the starting point of the new image, use the FROM instruction and point to my_docker_image that we built in the previous Building from Dockerfiles step.\nInstall packages\nUse the RUN instruction to install the package fastqc=0.11.9 with conda. Here there are several options available. For instance we could add an environment file e.g. environment.yml from the Conda tutorial and use conda env create to create an environment from that file. Or we could create an environment directly with conda create. We’ll try this later option here, so add a line that will create an environment named project_mrsa containing the fastqc package, and also clean up packages and cache after installation. Use the -y flag to conda create to avoid the prompt that expects an interaction from the user.\nIn order to have the project_mrsa environment activated upon start-up we need to add two more lines to the Dockerfile. First we need to use a RUN instruction to run echo \"source activate project_mrsa\" &gt;&gt; ~/.bashrc, and then we need to use the ENV instruction to set the $PATH variable inside the image to /opt/conda/envs/project_mrsa/bin:$PATH.\nAdd the analysis script\nUse the COPY instruction to Add run_qc.sh to the image. The syntax is COPY SOURCE TARGET. In this case SOURCE is the run_qc.sh script and TARGET is a path inside the image, for simplicity it can be specified with ./.\nSet default command\nUse the CMD instruction to set the default command for the image to bash run_qc.sh.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nFROM my_docker_image\n\nRUN conda create -y -n project_mrsa -c bioconda fastqc=0.11.9 && conda clean -a\n\nRUN echo \"source activate project_mrsa\" &gt;&gt; ~/.bashrc\n\nENV PATH=/opt/conda/envs/project_mrsa/bin:$PATH\n\nCOPY run_qc.sh .\n\nCMD bash run_qc.sh\n\n\n\nBuild the image and tag it my_docker_conda (remember to add --platform linux/x86_64 to the build command if you are using a Mac with the Apple chip).\ndocker build -t my_docker_conda -f Dockerfile_conda .\nVerify that the image was built using docker image ls.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow the keywords FROM, LABEL, MAINTAINER, RUN, ENV, SHELL, WORKDIR, and CMD can be used when writing a Dockerfile.\nHow to use docker build to construct and tag an image from a Dockerfile.\nHow to create your own Dockerfile."
+  },
+  {
+    "objectID": "pages/containers.html#managing-containers",
+    "href": "pages/containers.html#managing-containers",
+    "title": "Controlling your environment with Containers",
+    "section": "4 Managing containers",
+    "text": "4 Managing containers\nWhen you start a container with docker run it is given an unique id that you can use for interacting with the container. Let’s try to run a container from the image we just created:\ndocker run my_docker_conda\nIf everything worked run_qc.sh is executed and will first download and then analyse the three samples. Once it’s finished you can list all containers, including those that have exited.\ndocker container ls --all\nThis should show information about the container that we just ran. Similar to:\nCONTAINER ID   IMAGE            COMMAND                  CREATED         STATUS          PORTS      NAMES\nb6f7790462c4   my_docker_conda   \"tini -- /bin/bash -…\"  3 minutes ago   Up 24 seconds   8888/tcp   sad_maxwell\nIf we run docker run without any flags, your local terminal is attached to the container. This enables you to see the output of run_qc.sh, but also disables you from doing anything else in the meantime. We can start a container in detached mode with the -d flag. Try this out and run docker container ls to validate that the container is running.\nBy default, Docker keeps containers after they have exited. This can be convenient for debugging or if you want to look at logs, but it also consumes huge amounts of disk space. It’s therefore a good idea to always run with --rm, which will remove the container once it has exited.\nIf we want to enter a running container, there are two related commands we can use, docker attach and docker exec. docker attach will attach local standard input, output, and error streams to a running container. This can be useful if your terminal closed down for some reason or if you started a terminal in detached mode and changed your mind. docker exec can be used to execute any command in a running container. It’s typically used to peak in at what is happening by opening up a new shell. Here we start the container in detached mode and then start a new interactive shell so that we can see what happens. If you use ls inside the container you can see how the script generates file in the data and results directories. Note that you will be thrown out when the container exits, so you have to be quick.\ndocker run -d --rm --name my_container my_docker_conda\ndocker exec -it my_container /bin/bash\n\n4.1 Bind mounts\nThere are obviously some advantages to isolating and running your data analysis in containers, but at some point you need to be able to interact with the rest of the host system (e.g. your laptop) to actually deliver the results. This is done via bind mounts. When you use a bind mount, a file or directory on the host machine is mounted into a container. That way, when the container generates a file in such a directory it will appear in the mounted directory on your host system.\n\n\n\n\n\n\nTip\n\n\n\nDocker also has a more advanced way of data storage called volumes. Volumes provide added flexibility and are independent of the host machine’s file system having a specific directory structure available. They are particularly useful when you want to share data between containers.\n\n\nSay that we are interested in getting the resulting html reports from FastQC in our container. We can do this by mounting a directory called, say, fastqc_results in your current directory to the /course/results/fastqc directory in the container. Try this out by running:\ndocker run --rm -v $(pwd)/fastqc_results:/course/results/fastqc my_docker_conda\nHere the -v flag to docker run specifies the bind mount in the form of directory/on/your/computer:/directory/inside/container. $(pwd) simply evaluates to the working directory on your computer.\nOnce the container finishes validate that it worked by opening one of the html reports under fastqc_results/.\nWe can also use bind mounts for getting files into the container rather than out. We’ve mainly been discussing Docker in the context of packaging an analysis pipeline to allow someone else to reproduce its outcome. Another application is as a kind of very powerful environment manager, similarly to how we’ve used Conda before. If you’ve organized your work into projects, then you can mount the whole project directory in a container and use the container as the terminal for running stuff while still using your normal OS for editing files and so on. Let’s try this out by mounting our current directory and start an interactive terminal. Note that this will override the CMD command, so we won’t start the analysis automatically when we start the container.\ndocker run -it --rm -v $(pwd):/course/ my_docker_conda /bin/bash\nIf you run ls you will see that all the files in the container/ directory are there.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use docker run for starting a container and how the flags -d and --rm work.\nHow to use docker container ls for displaying information about the containers.\nHow to use docker attach and docker exec to interact with running containers.\nHow to use bind mounts to share data between the container and the host system."
+  },
+  {
+    "objectID": "pages/containers.html#sharing-images",
+    "href": "pages/containers.html#sharing-images",
+    "title": "Controlling your environment with Containers",
+    "section": "5 Sharing images",
+    "text": "5 Sharing images\nThere would be little point in going through all the trouble of making your analyses reproducible if you can’t distribute them to others. Luckily, sharing Docker containers is extremely easy, and can be done in several ways. One of the more common ways to share Docker images is through container registries and repositories.\nFor example, a Docker registry is a service that stores Docker images, which could be hosted by a third party, publicly or privately. One of the most common registries is Docker Hub, which is a registry hosted by Docker itself. A repository, on the other hand, is a collection of container images with the same name but different tags (i.e. versions), for example ubuntu:latest or ubuntu:20.04. Repositories are stored in registries.\n\n\n\n\n\n\nNote\n\n\n\nRemember that we now have some clashing nomenclature between Git repositories (which we covered in the Git tutorial) and container repositories, so be aware of which one you’re talking about!\n\n\nThere are many registries out there, but here are some that might be of interest to you who are taking this course:\n\nDocker Hub\nGitHub Container Registry\nQuay\nBiocontainers\nRocker\nJupyter containers\n\nThe most common registry is probably Docker Hub, which lets you host unlimited public images and one private image for free (after which they charge a small fee). The GitHub Container Registry is also quite handy if you’re already using GitHub. Let’s see how it’s done using Docker Hub!\n\nRegister for an account on Docker Hub.\nUse docker login -u your_dockerhub_id to login to the Docker Hub registry. Or use the Sign in button in Docker Desktop.\nWhen you build an image, tag it with -t your_dockerhub_id/image_name, rather than just image_name.\nOnce the image has been built, upload it to Docker Hub with docker push your_dockerhub_id/image_name.\nIf another user runs docker run your_dockerhub_id/image_name the image will automatically be retrieved from Docker Hub. You can use docker pull for downloading without running.\n\nIf you want to refer to a Docker image in for example a publication, it’s very important that it’s the correct version of the image. This is handled via the ‘tags’ (e.g. docker build -t your_dockerhub_id/image_name:tag_name) that we introduced in Containers 2: The basics and used when building images in Containers 3: Building images.\n\n\n\n\n\n\nTip\n\n\n\nOn Docker Hub it is also possible to link to your Bitbucket or GitHub account and select repositories from which you want to automatically build and distribute Docker images. The Docker Hub servers will then build an image from the Dockerfile in your Git repository and make it available for download using docker pull. That way, you don’t have to bother manually building and pushing using docker push. The GitHub repository for this course is linked to Docker Hub and the Docker images are built automatically from Dockerfile and Dockerfile_slim, triggered by changes made to the GitHub repository. You can take a look at the course on Docker Hub here.\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow container registries and repositories work\nHow to use Docker Hub to share Docker images"
+  },
+  {
+    "objectID": "pages/containers.html#packaging-the-case-study",
+    "href": "pages/containers.html#packaging-the-case-study",
+    "title": "Controlling your environment with Containers",
+    "section": "6 Packaging the case study",
+    "text": "6 Packaging the case study\nDuring these tutorials we have been working on a case study about the multi-resistant bacteria MRSA. Here we will build and run a Docker container that contains all the work we’ve done so far.\n\nWe’ve set up a GitHub repository for version control and for hosting our project.\nWe’ve defined a Conda environment that specifies the packages we’re depending on in the project.\nWe’ve constructed a Snakemake workflow that performs the data analysis and keeps track of files and parameters.\nWe’ve written a Quarto document that takes the results from the Snakemake workflow and summarizes them in a report.\n\nThe workshop-reproducible-research/tutorials/containers directory contains the final versions of all the files we’ve generated in the other tutorials: environment.yml, Snakefile, config.yml and code/supplementary_material.qmd. The only difference compared to the other tutorials is that we have also included the rendering of the Supplementary Material HTML file into the Snakemake workflow as the rule make_supplementary. Running all of these steps will take some time to execute (around 20 minutes or so), in particular if you’re on a slow internet connection.\nNow take a look at Dockerfile. Everything should look quite familiar to you, since it’s basically the same steps as in the image we constructed in the Building images section, although with some small modifications. The main difference is that we add the project files needed for executing the workflow (mentioned in the previous paragraph), and install the conda packages using environment.yml. If you look at the CMD command you can see that it will run the whole Snakemake workflow by default.\nNow run docker build as before, tag the image with my_docker_project (remember the --platform linux/x86_64 flag if you’re on a new Mac with the Apple chip):\ndocker build -t my_docker_project -f Dockerfile .\nGo get a coffee while the image builds (or you could use docker pull nbisweden/workshop-reproducible-research which will download the same image).\nValidate with docker image ls. Now all that remains is to run the whole thing with docker run. We just want to get the results, so mount the directory /course/results/ to, say, results/ in your current directory. Click below to see how to write the command.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nIf building your own image:\ndocker run -v $(pwd)/results:/course/results my_docker_project\nIf you pulled the image from DockerHub:\ndocker run -v $(pwd)/results:/course/results nbisweden/workshop-reproducible-research\n\n\n\nWell done! You now have an image that allows anyone to exactly reproduce your analysis workflow (if you first docker push to Dockerhub that is).\n\n\n\n\n\n\nTip\n\n\n\nIf you’ve done the Jupyter tutorial, you know that Jupyter Notebook runs as a web server. This makes it very well suited for running in a Docker container, since we can just expose the port Jupyter Notebook uses and redirect it to one of our own. You can then work with the notebooks in your browser just as you’ve done before, while it’s actually running in the container. This means you could package your data, scripts and environment in a Docker image that also runs a Jupyter Notebook server. If you make this image available, say on Dockerhub, other researchers could then download it and interact with your data/code via the fancy interactive Jupyter notebooks that you have prepared for them. We haven’t made any fancy notebooks for you, but we have set up a Jupyter Notebook server. Try it out if you want to (replace the image name with your version if you’ve built it yourself):\ndocker run -it nbisweden/workshop-reproducible-research jupyter notebook -allow-root --no-browser"
+  },
+  {
+    "objectID": "pages/containers.html#apptainer",
+    "href": "pages/containers.html#apptainer",
+    "title": "Controlling your environment with Containers",
+    "section": "7 Apptainer",
+    "text": "7 Apptainer\nApptainer is a container software alternative to Docker. It was originally developed as Singularity by researchers at Lawrence Berkeley National Laboratory (read more about this below) with focus on security, scientific software, and HPC clusters. One of the ways in which Apptainer is more suitable for HPC is that it very actively restricts permissions so that you do not gain access to additional resources while inside the container. Apptainer also, unlike Docker, stores images as single files using the Singularity Image Format (SIF). A SIF file is self-contained and can be moved around and shared like any other file, which also makes it easy to work with on an HPC cluster.\n\n\n\n\n\n\nApptainer and Singularity\n\n\n\nThe open source Singularity project was renamed to Apptainer in 2021. The company Sylabs still keeps their commercial branch of the project under the Singularity name, and offer a free ‘Community Edition’ version. The name change was done in order to clarify the distinction between the open source project and the various commercial versions. At the moment there is virtually no difference to you as a user whether you use Singularity or Apptainer, but eventually it’s very likely that the two will diverge.\n\n\nWhile it is possible to define and build Apptainer images from scratch, in a manner similar to what you’ve already learned for Docker, this is not something we will cover here (but feel free to read more about this in e.g. the Apptainer docs.\nThe reasons for not covering Apptainer more in-depth are varied, but it basically boils down to it being more or less Linux-only, unless you use Virtual Machines (VMs). Even with this you’ll run into issues of incompatibility of various kinds, and these issues are further compounded if you’re on one of the new ARM64-Macs. You also need root (admin) access in order to actually build Apptainer images regardless of platform, meaning that you can’t build them on e.g. Uppmax, even though Apptainer is already installed there. You can, however, use the --remote flag, which runs the build on Apptainer’s own servers. This doesn’t work in practice a lot of the time, though, since most scientist will work in private Git repositories so that their research and code is not available to anybody, and the --remote flag requires that e.g. the environment.yml file is publicly available.\nThere are very good reasons to use Apptainer, however, the major one being that you aren’t allowed to use Docker on most HPC systems! One of the nicer features of Apptainer is that it can convert Docker images directly for use within Apptainer, which is highly useful for the cases when you already built your Docker image or if you’re using a remotely available image stored on e.g. DockerHub. For a lot of scientific work based in R and/or Python, however, it is most often the case that you build your own images, since you have a complex dependency tree of software packages not readily available in existing images. So, we now have another problem for building our own images:\n\nOnly Apptainer is allowed on HPC systems, but you can’t build images there due to not having root access.\nYou can build Apptainer images locally and transfer them to HPCs, but this is problematic unless you’re running Linux natively.\n\nSeems like a “catch 22”-problem, right? There are certainly workarounds (some of which we have already mentioned) but most are roundabout or difficult to get working for all use-cases. Funnily enough, there’s a simple solution: run Apptainer locally from inside a Docker container! Conceptually very meta, yes, but works very well in practice. What we are basically advocating for is that you stick with Docker for most of your container-based work, but convert your Docker images using Apptainer-in-Docker whenever you need to work on an HPC. This is of course not applicable to Linux users or those of you who are fine with working through using VMs and managing any issues that arise from doing that.\n\n\n\n\n\n\nSummary\n\n\n\nApptainer is a great piece of software that is easiest to use if you’re working on a Linux environment. Docker is, however, easier to use from a cross-platform standpoint and covers all use-cases except running on HPCs. Running on HPCs can be done by converting existing Docker images at runtime, while building images for use on HPCs can be done using local Docker images and Apptainer-in-Docker.\n\n\n\n7.1 Apptainer-in-Docker\nBy creating a bare-bones, Linux-based Docker image with Apptainer you can build Apptainer images locally on non-Linux operating systems. There is already a good image setup for just this, and it is defined in this GitHub repository. Looking at the instructions there we can see that we need to do the following:\ndocker run \\\n    --rm \\\n    -v /var/run/docker.sock:/var/run/docker.sock \\\n    -v $(pwd):/work \\\n    kaczmarj/apptainer \\\n    build &lt;IMAGE&gt;.sif docker-daemon://&lt;IMAGE&gt;:&lt;TAG&gt;\nYou already know about docker run, the --rm flag and bind mounts using -v. The /var/run/docker.sock part is the Unix socket that the Docker daemon listens to by default, meaning that it is needed for us to be able to specify the location of the Docker container we want to convert to a SIF file. The kaczmarj/apptainer part after the bind mounts is the image location hosted at DockerHub, while the last line is the Apptainer command that actually does the conversion. All we need to do is to replace the &lt;IMAGE&gt; part with the Docker image we want to convert, e.g. my_docker_image.\n\nReplace &lt;IMAGE&gt; and &lt;TAG&gt; with one of your locally available Docker images and one of its tags and run the command - remember that you can use docker image ls to check what images you have available.\n\nIn the end you’ll have a SIF file (e.g. my_docker_image.sif) that you can transfer to an HPC such as Uppmax and run whatever analyses you need. If you want to be able to do this without having to remember all the code you can check out the this script.\n\n\n7.2 Running Apptainer\nThe following exercises assume that you have a login to the Uppmax HPC cluster in Uppsala, but will also work for any other system that has Apptainer installed - like if you managed to install Apptainer on your local system or have access to some other HPC cluster. Let’s try to convert the Docker image for this course directly from DockerHub:\napptainer pull mrsa_proj.sif docker://nbisweden/workshop-reproducible-research\nThis should result in a SIF file called mrsa_proj.sif.\nIn the Docker image we included the code needed for the workflow in the /course directory of the image. These files are of course also available in the Apptainer image. However, a Apptainer image is read-only. This will be a problem if we try to run the workflow within the /course directory, since the workflow will produce files and Snakemake will create a .snakemake directory. Instead, we need to provide the files externally from our host system and simply use the Apptainer image as the environment to execute the workflow in (i.e. all the software and dependencies).\nIn your current working directory (workshop-reproducible-research/tutorials/containers/) the vital MRSA project files are already available (Snakefile, config.yml and code/supplementary_material.qmd). Since Apptainer bind mounts the current working directory we can simply execute the workflow and generate the output files using:\napptainer run mrsa_proj.sif\nThis executes the default run command, which is snakemake -rp -c 1 --configfile config.yml (as defined in the original Dockerfile). Once completed you should see a bunch of directories and files generated in your current working directory, including the results/ directory containing the final HTML report.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to build a Apptainer image using Apptainer inside Docker.\nHow to convert Docker images to Apptainer images.\nHow to run Apptainer images."
+  },
+  {
+    "objectID": "pages/containers.html#extra-material",
+    "href": "pages/containers.html#extra-material",
+    "title": "Controlling your environment with Containers",
+    "section": "8 Extra material",
+    "text": "8 Extra material\nContainers can be large and complicated, but once you start using them regularly you’ll find that you start understand these complexities. There are lots of different things you can do with images and containers in general, especially when it comes to optimising build time or final image size. Here is some small tips and tricks that you can be inspired from!\nIf you want to read more about containers in general you can check out these resources:\n\nA “Get started with Docker” at the Docker website.\nAn early paper on the subject of using Docker for reproducible research.\n\n\n8.1 Building for multiple platforms\nWith the newer ARM64 architectures introduced by Apple one often runs into the problem of not having an architecture-native image to run with. This is sometimes okay since the Rosetta2 software can emulate the old AMD64 architecture on newer ARM64 computers, but results in a performance hit. One could just build for ARM64 using --platform=linux/arm64 instead, but then somebody who doesn’t have the new architecture can’t run it. There is a way around this, however: multi-platform builds. We can build for multiple platforms at the same time and push those to e.g. DockerHub and anybody using those images will automatically pull the one appropriate for their computer. Here’s how to do it:\n\nStart by checking the available builders using docker buildx ls.\n\nYou should only see the default builder, which does not have access to multi-platform builds. Let’s create a new builder that does have access to it:\n\nRun the following: docker buildx create --name mybuilder --driver   docker-container --bootstrap.\nSwitch to using the new builder with docker buildx use mybuilder and check that it worked with docker buildx ls.\n\nAll that’s needed now is to build and push the images! The following command assumes that you have an account with &lt;username&gt; at DockerHub and you’re pushing the &lt;image&gt; image:\ndocker buildx build --platform linux/amd64,linux/arm64 -t &lt;username&gt;/&lt;image&gt;:latest --push .\n\nExecute the above command with your username and your image.\n\nThat’s it! Now anybody who does e.g. docker pull &lt;username&gt;/&lt;image&gt; will get an image appropriate for their architecture whether they are on AMD64 or ARM64!\n\n\n\n\n\n\nAn alias to buildx\n\n\n\nYou can type docker buildx install to make the docker build into an alias for docker buildx, allowing you to run multi-platform builds using docker build. Use docker buildx uninstall to remove this alias."
+  },
   {
     "objectID": "lectures/snakemake/snakemake.html#why-do-we-need-workflow-managers",
     "href": "lectures/snakemake/snakemake.html#why-do-we-need-workflow-managers",
@@ -606,14 +725,14 @@
     "href": "index.html",
     "title": "",
     "section": "",
-    "text": "How to work reproducibly with control and structuring of project code, environment and workflow management\n\n\nUpdated: 10-10-2024 at 09:14:09 ."
+    "text": "How to work reproducibly with control and structuring of project code, environment and workflow management\n\n\nUpdated: 15-10-2024 at 16:39:31 ."
   },
   {
     "objectID": "index.html#tools-for-reproducible-research",
     "href": "index.html#tools-for-reproducible-research",
     "title": "",
     "section": "",
-    "text": "How to work reproducibly with control and structuring of project code, environment and workflow management\n\n\nUpdated: 10-10-2024 at 09:14:09 ."
+    "text": "How to work reproducibly with control and structuring of project code, environment and workflow management\n\n\nUpdated: 15-10-2024 at 16:39:31 ."
   },
   {
     "objectID": "home_schedule.html",
@@ -1105,6 +1224,41 @@
     "section": "Quarto vs. R Markdown",
     "text": "Quarto vs. R Markdown\n\n\nQuarto is a command line tool\nQuarto \\(\\thickapprox\\) R Markdown 2.0\nQuarto is language-agnostic (does not depend on R)\nQuarto has all functionality built-in (you don’t need to install another package to create e.g. presentations)\nThe Quarto format is similar to R Markdown\nQuarto can render R Markdown documents\nR Markdown will continue to be supported, but Quarto is the focus of new functionality and major development"
   },
+  {
+    "objectID": "pages/conda.html",
+    "href": "pages/conda.html",
+    "title": "Controlling your environment with Conda",
+    "section": "",
+    "text": "Conda is a package and environment manager. As a package manager it enables you to install a wide range of software and tools using one simple command: conda install. As an environment manager it allows you to create and manage multiple different environments, each with their own set of packages.\nWhat are the benefits of using an environment manager? Some examples include the ability to easily run different versions of the same package, have different cross-package dependencies that are otherwise incompatible with each other and, last but not least, easy installation of all the software needed for an analysis.\nEnvironments are of particular relevance when making bioinformatics projects reproducible. Full reproducibility requires the ability to recreate the system that was originally used to generate the results. This can, to a large extent, be accomplished by using Conda to make a project environment with specific versions of the packages that are needed in the project. You can read more about Conda here.\nA Conda package is a compressed tarball (system-level libraries, Python or other modules, executable programs or other components). Conda keeps track of the dependencies between packages and platforms - this means that when installing a given package, all necessary dependencies will also be installed.\nConda packages are typically hosted and downloaded from remote so-called channels. Some widely used channels for general-purpose and bioinformatics packages are conda-forge and Bioconda, respectively. Both of these are community-driven projects, so if you’re missing some package you can contribute to the channel by adding the package to it. When installing a Conda package you specify the package name, version (optional) and channel to download from.\nA Conda environment is essentially a directory that is added to your PATH and that contains a specific collection of packages that you have installed. Packages are symlinked between environments to avoid unnecessary duplication.\n\nDifferent Conda flavours You may come across several flavours of Conda. There’s Miniconda, which is the installer for Conda. The second is Anaconda, which is a distribution of not only Conda, but also over 150 scientific Python packages curated by the company by the same name (Anaconda). It’s generally better to stick with the Miniconda installation rather than installing 3 GB worth of packages you may not even use. Then, lastly, there’s the Miniforge flavour that we’re using here, which is a community-driven version of Conda that’s highly popular within the scientific community.\nThe difference between Miniconda and Miniforge is that the former points to points to the default channel by default (which requires an Anaconda license for commercial purposes), while the latter points to the community-maintained conda-forge channel by default. While Conda is created and owned by Anaconda the company, Conda itself is open source - it’s the default channel that is proprietary. The conda-forge and bioconda channels (two of the largest channels outside of default) are community-driven. Confusing? Yes. If you want this information more in-depth you can read this blog post by Anaconda."
+  },
+  {
+    "objectID": "pages/conda.html#introduction",
+    "href": "pages/conda.html#introduction",
+    "title": "Controlling your environment with Conda",
+    "section": "",
+    "text": "Conda is a package and environment manager. As a package manager it enables you to install a wide range of software and tools using one simple command: conda install. As an environment manager it allows you to create and manage multiple different environments, each with their own set of packages.\nWhat are the benefits of using an environment manager? Some examples include the ability to easily run different versions of the same package, have different cross-package dependencies that are otherwise incompatible with each other and, last but not least, easy installation of all the software needed for an analysis.\nEnvironments are of particular relevance when making bioinformatics projects reproducible. Full reproducibility requires the ability to recreate the system that was originally used to generate the results. This can, to a large extent, be accomplished by using Conda to make a project environment with specific versions of the packages that are needed in the project. You can read more about Conda here.\nA Conda package is a compressed tarball (system-level libraries, Python or other modules, executable programs or other components). Conda keeps track of the dependencies between packages and platforms - this means that when installing a given package, all necessary dependencies will also be installed.\nConda packages are typically hosted and downloaded from remote so-called channels. Some widely used channels for general-purpose and bioinformatics packages are conda-forge and Bioconda, respectively. Both of these are community-driven projects, so if you’re missing some package you can contribute to the channel by adding the package to it. When installing a Conda package you specify the package name, version (optional) and channel to download from.\nA Conda environment is essentially a directory that is added to your PATH and that contains a specific collection of packages that you have installed. Packages are symlinked between environments to avoid unnecessary duplication.\n\nDifferent Conda flavours You may come across several flavours of Conda. There’s Miniconda, which is the installer for Conda. The second is Anaconda, which is a distribution of not only Conda, but also over 150 scientific Python packages curated by the company by the same name (Anaconda). It’s generally better to stick with the Miniconda installation rather than installing 3 GB worth of packages you may not even use. Then, lastly, there’s the Miniforge flavour that we’re using here, which is a community-driven version of Conda that’s highly popular within the scientific community.\nThe difference between Miniconda and Miniforge is that the former points to points to the default channel by default (which requires an Anaconda license for commercial purposes), while the latter points to the community-maintained conda-forge channel by default. While Conda is created and owned by Anaconda the company, Conda itself is open source - it’s the default channel that is proprietary. The conda-forge and bioconda channels (two of the largest channels outside of default) are community-driven. Confusing? Yes. If you want this information more in-depth you can read this blog post by Anaconda."
+  },
+  {
+    "objectID": "pages/conda.html#the-basics",
+    "href": "pages/conda.html#the-basics",
+    "title": "Controlling your environment with Conda",
+    "section": "2 The basics",
+    "text": "2 The basics\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if, you haven’t done so already. Then open up a terminal and go to workshop-reproducible-research/tutorials/conda. Instructions below assume that you are standing in workshop-reproducible-research/tutorials/conda/ unless otherwise specified (e.g. if it says “create a file”, it means save it in workshop-reproducible-research/tutorials/conda/).\nLet’s assume that you are just about to start a new exciting research project called Project A.\n\n2.1 Creating Conda environments\n\nLet’s make our first Conda environment:\n\nconda create -n project_a -c bioconda fastqc\nThis will create an environment called project_a, containing FastQC from the Bioconda channel. Conda will list the packages that will be installed and ask for your confirmation.\n\nOnce it is done, you can activate the environment:\n\nconda activate project_a\nBy default, Conda will add information to your prompt telling you which environment that is active.\n\nTo see all your environments you can run:\n\nconda info --envs\nThe active environment will be marked with an asterisk.\n\nTo see the installed packages and their versions in the active environment, run:\n\nconda list\n\nTo save the installed packages to a file, run:\n\nconda env export --from-history &gt; environment.yml\nWhere --from-history only reports the packages requested to be installed and not additional dependencies. A caveat is that if no version was originally specified, then it is not included in the export file either.\n\nNow, deactivate the environment by running conda deactivate.\nList all environments again. Which environment is now marked as active?\nTry to run FastQC:\n\nfastqc --version\n\nDid it work? Activate your project_a environment and run the fastqc --version command again. Does it work now?\n\nHopefully the FastQC software was not found in your base environment (unless you had installed it previously), but worked once your environment was activated.\n\n\n2.2 Adding more packages\n\nNow, let’s add another package (MultiQC) to our environment using conda install. Make sure that project_a is the active environment first.\n\nconda install -c bioconda multiqc\n\nIf we don’t specify the package version, the latest available version will be installed. What version of MultiQC got installed?\nRun the following to see what versions are available:\n\nconda search -c bioconda multiqc\n\nNow try to install a different version of MultiQC, e.g.:\n\nconda install -c bioconda multiqc=1.13\nRead the information that Conda displays in the terminal. It probably asks if you want to downgrade the initial MultiQC installation to the one specified here (1.13 in the example). You can only have one version of a given package in a given environment.\nLet’s assume that you will have sequencing data in your Project A, and want to use the latest BBMap software to align your reads.\n\nFind out what versions of BBMap are available in the Bioconda channel using conda search -c bioconda bbmap.\nNow install the latest available version of BBMap in your project_a environment.\n\nLet’s further assume that you have an old project (called Project Old) where you know you used BBMap 37.10. You just got back reviewer comments and they want you to include some alignment statistics. Unfortunately, you haven’t saved that information so you will have to rerun the alignment. Now, it is essential that you use the same version of BBMap that your results are based on, otherwise the alignment statistics will be misleading. Using Conda environments this becomes simple. You can just have a separate environment for your old project where you have an old version of BBMap without interfering with your new Project A where you want the latest version.\n\nMake a new environment for your old project:\n\nconda create -n project_old -c bioconda bbmap=37.10\n\nList your environments (do you remember the command?).\nActivate project_old and check the BBMap version (bbmap.sh --version).\nActivate project_a again and check the BBMap version.\n\n\n\n2.3 Removing packages\nNow let’s try to remove an installed package from the active environment:\nconda remove multiqc\n\nRun conda deactivate to exit your active environment.\nNow, let’s remove an environment:\n\nconda env remove -n project_old\nAfter making a few different environments and installing a bunch of packages, Conda can take up some disk space. You can remove unnecessary files with the command:\nconda clean -a\nThis will remove package tar-balls that are left from package installations, unused packages (i.e. those not present in any environments), and cached data.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use conda install for installing packages on the fly.\nHow to create, activate and change between environments.\nHow to remove packages or environments and clean up."
+  },
+  {
+    "objectID": "pages/conda.html#working-with-environments",
+    "href": "pages/conda.html#working-with-environments",
+    "title": "Controlling your environment with Conda",
+    "section": "3 Working with environments",
+    "text": "3 Working with environments\nWe have up until now specified which Conda packages to install directly on the command line using the conda create and conda install commands. For working in projects this is not the recommended way. Instead, for increased control and reproducibility, it is better to use an environment file (in YAML format) that specifies the packages, versions and channels needed to create the environment for a project.\nThroughout these tutorials we will use a case study where we analyse an RNA-seq experiment with the multi-resistant bacteria MRSA (see intro). You will now start to make a Conda YAML file for this MRSA project. The file will contain a list of the software and versions needed to execute the analysis code.\nIn this Conda tutorial, all code for the analysis is available in the script code/run_qc.sh. This code will download the raw FASTQ-files and subsequently run quality control on these using the FastQC software.\n\n3.1 Working with environments\nWe will start by making a Conda YAML-file that contains the required packages to perform these two steps. Later in the course, you will update the Conda YAML-file with more packages, as the analysis workflow is expanded.\n\nLet’s get going! Make a YAML file called environment.yml looking like this, and save it in the current directory (which should be workshop-reproducible-research/tutorials/conda):\n\nchannels:\n  - conda-forge\n  - bioconda\ndependencies:\n  - fastqc=0.12.1\n\nNow, make a new Conda environment from the YAML file (note that here the command is conda env create as opposed to conda create that we used before):\n\nconda env create -n project_mrsa -f environment.yml\n\n\n\n\n\n\nTip\n\n\n\nYou can also specify exactly which channel a package should come from inside the environment file, using the channel::package=version syntax.\n\n\n\n\n\n\n\n\nTip\n\n\n\nInstead of the -n flag you can use the -p flag to set the full path to where the Conda environment should be installed. In that way you can contain the Conda environment inside the project directory, which does make sense from a reproducibility perspective, and makes it easier to keep track of what environment belongs to what project. If you don’t specify -p the environment will be installed in the envs/ directory inside your Conda installation path.\n\n\n\nActivate the environment!\nNow we can run the code for the MRSA project found in code/run_qc.sh, either by running bash code/run_qc.sh or by opening the run_qc.sh file and executing each line in the terminal one by one. Do this!\n\nThis should download the project FASTQ files and run FastQC on them (as mentioned above).\n\nCheck your directory contents (ls -Rlh, or in your file browser). It should now have the following structure:\n\n   conda/\n    |\n    |- code/\n    |   |- run_qc.sh\n    |\n    |- data/\n    |   |- SRR935090.fastq.gz\n    |   |- SRR935091.fastq.gz\n    |   |- SRR935092.fastq.gz\n    |\n    |- results/\n    |   |- fastqc/\n    |       |- SRR935090_fastqc.html\n    |       |- SRR935090_fastqc.zip\n    |       |- SRR935091_fastqc.html\n    |       |- SRR935091_fastqc.zip\n    |       |- SRR935092_fastqc.html\n    |       |- SRR935092_fastqc.zip\n    |\n    |- environment.yml\nNote that all that was needed to carry out the analysis and generate these files and results was environment.yml (that we used to create a Conda environment with the required packages) and the analysis code in code/run_qc.sh.\n\n\n3.2 Keeping track of dependencies\nProjects can often be quite large and require lots of dependencies; it can feel daunting to try to capture all of that in a single Conda environment, especially when you consider potential incompatibilities that may arise. It can therefore be a good idea to start new projects with an environment file with each package you know that you will need to use, but without specifying exact versions (except for those packages where you know you need a specific version). This will install the latest compatible versions of all the specified software, making the start-up and installation part of new projects easier. You can then add the versions that were installed to your environment file afterwards, ensuring future reproducibility.\nThere is one command that can make this easier: conda env export. This allows you to export a list of the packages you’ve already installed, including their specific versions, meaning you can easily add them after the fact to your environment file. If you use the --no-builds flag, you’ll get a list of the packages minus their OS-specific build specifications, which is more useful for making the environment portable across systems. This way, you can start with an environment file with just the packages you need (without version), which will install the most up-to-date version possible, and then add the resulting version back in to the environment file using the export command!\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to define our Conda environment using a YAML-file.\nHow to use conda env create to make a new environment from a YAML-file.\nHow to use conda env export to get a list of installed packages.\nHow to work in a project-like setting."
+  },
+  {
+    "objectID": "pages/conda.html#extra-material",
+    "href": "pages/conda.html#extra-material",
+    "title": "Controlling your environment with Conda",
+    "section": "4 Extra material",
+    "text": "4 Extra material\nThe following extra material contains some more advanced things you can do with Conda and the command line in general, which is not part of the main course materials. All the essential skills of are covered by the previous section: the material here should be considered tips and tricks from people who use Conda as part of their daily work. You thus don’t need to use these things unless you want to, and you can even skip this part of the lesson if you like!\n\n4.1 Configuration\nThe behaviour of your Conda installation can be changed using an optional configuration file .condarc. On a fresh Conda install no such file is included but it’s created in your home directory as ~/.condarc the first time you run conda config.\nYou can edit the .condarc file either using a text editor or by way of the conda config command. To list all config parameters and their settings run:\nconda config --show\nSimilar to Conda environment files, the configuration file is in YAML syntax. This means that the config file is structured in the form of key:value pairs where the key is the name of the config parameter (e.g. auto_update_conda) and the value is the parameter setting (e.g. True).\nAdding the name of a config parameter to conda config --show will show only that parameter, e.g. conda config --show channels.\nYou can change parameters with the --set, --add, --append and --remove flags to conda config.\nIf you for example want to enable the ‘Always yes’ behaviour which makes Conda automatically choose the yes option, such as when installing, you can run:\nconda config --set always_yes True\nTo see details about a config parameter you can run conda config --describe parameter. Try running it on the channels parameter:\nconda config --describe channels\nIn the beginning of this tutorial we added Conda channels to the .condarc file using conda config --add channels. To remove one of the channels from the configuration file you can run:\nconda config --remove channels conda-forge\nCheck your .condarc file to see the change. To add the conda-forge channel back to the top of the channels simply run:\nconda config --add channels conda-forge\nTo completely remove a parameter and all its values run:\nconda config --remove-key parameter\nFor a list of Conda configuration parameters see the Conda configuration page.\n\n\n4.2 Managing Python versions\nWith Conda environments it’s possible to keep several different versions of Python on your computer at the same time, and switching between these versions is very easy. However, a single Conda environment can only contain one version of Python.\n\n4.2.1 Your current Python installation\nThe base environment has its own version of Python installed. When you open a terminal (after having installed Conda on your system) this base environment is activated by default (as evidenced by (base) prepended to your prompt). You can check what Python version is installed in this environment by running python --version. To see the exact path to the Python executable type which python.\nIn addition to this your computer may already have Python installed in a separate (system-wide) location outside of the Conda installation. To see if that is the case type conda deactivate until your prompt is not prepended with a Conda environment name. Then type which python. If a path was printed to the terminal (e.g. /usr/bin/python) that means some Python version is already installed in that location. Check what version it is by typing python --version.\nNow activate the base environment again by typing conda activate (or the equivalent conda activate base) then check the Python installation path and version using which and python --version as above. See the difference? When you activate an environment your $PATH variable is updated so that when you call python (or any other program) the system first searches the directory of the currently active environment.\n\n\n4.2.2 Different Python versions\nWhen you create a new Conda environment you can choose to install a specific version of Python in that environment as well. As an example, create an environment containing Python version 3.5 by running:\nconda create -n py35 python=3.5\nHere we name the environment py35 but you can choose whatever name you want.\nTo activate the environment run:\nconda activate py35\nYou now have a completely separate environment with its own Python version.\nLet’s say you instead want an environment with Python version 2.7 installed. You may for instance want to run scripts or packages that were written for Python 2.x and are thus incompatible with Python 3.x. Simply create the new Conda environment with:\nconda create -n py27 python=2.7\nActivate this environment with:\nconda activate py27\nNow, switching between Python versions is as easy as typing conda activate py35 / conda activate py27.\n\n\n\n\n\n\nNote\n\n\n\nIf you create an environment where none of the packages require Python, and you don’t explicitly install the python package then that new environment will use the Python version installed in your base environment.\n\n\n\n\n\n4.3 Decorating your prompt\nBy default, the name of the currently activated environment is added to your command line prompt. This is a good thing, as it makes it easier to keep track of what environment and packages you have access to. The way this is done in the default implementation becomes an issue when using absolute paths for environments (specifying conda env create -p path/to/environment, though, as the entire path will be added to the prompt. This can take up a lot of unnecessary space on your screen, but can be solved in a number of ways.\nThe most straightforward way to solve this is to change the Conda configuration file, specifically the settings of the env_prompt configuration value which determines how Conda modifies your command line prompt. For more information about this setting you can run conda config --describe env_prompt and to see your current setting you can run conda config --show env_prompt.\nBy default env_prompt is set to ({default_env}) which modifies your prompt with the active environment name if it was installed using the -n flag or if the environment folder has a parent folder named envs/. Otherwise the full environment path (i.e. the ‘prefix’) is displayed.\nIf you instead set env_prompt to ({name}) Conda will modify your prompt with the folder name of the active environment. You can change the setting by running conda config --set env_prompt '({name}) '\nIf you wish to keep the ({default_env}) behaviour, or just don’t want to change your Conda config, an alternative is to keep Conda environment folders within a parent folder called envs/. This will make Conda only add the folder name of the Conda environment to your prompt when you activate it.\nAs an example, say you have a project called project_a with the project path ~/myprojects/project_a. You could then install the environment for project_a into a folder ~/myprojects/project_a/envs/project_a_environment. Activating the environment by pointing Conda to it (e.g. conda activate ~/myprojects/project_a/envs/project_a_environment) will only cause your prompt to be modified with project_a_environment.\n\n\n4.4 Bash aliases for conda\nSome programmers like to have aliases (i.e. shortcuts) for common commands. Two aliases that might be useful for you are alias coac='conda activate' and alias code='conda deactivate'. Don’t forget to add them to your ~/.bash_profile if you want to use them!\n\n\n4.5 Rolling back to an earlier version of the environment\nThe history of the changes to an environment are automatically tracked. You can see revisions to an environment by using:\nconda list --revisions\nWhich shows each revision (numbered) and what’s installed.\nYou can revert back to particular revision using:\nconda install --revision 5\n\n\n4.6 Mamba, the drop-in Conda replacement\nThere is another piece of software that is built on top of Conda as a drop-in replacement for it: Mamba. The reason for Mamba’s existence is that it used to have a better solver algorithm for the dependency tree than Conda did. These days, however, this algorithm is included in Conda as the default. There is still some minor reasons you might want to use Mamba, however, the first of which being that Mamba re-implements Conda in C++, which runs slightly faster than the Python-based Conda. This only yields a minor speed increase compared to the dependency-tree algorithm, though, so don’t expect major differences in execution time between Conda and Mamba. Another reason is that Mamba colours its output, which is nice if you care about that sort of thing. If you installed Conda as described in the pre-course material you’ll, conveniently, already have installed Mamba as well!"
+  },
   {
     "objectID": "pages/git.html",
     "href": "pages/git.html",
@@ -1176,10 +1330,283 @@
     "text": "9 Extra material\nThe following extra material contains some more advanced things you can do with Git and the command line in general, which is not part of the main course materials. All the essential skills of Git are covered by the previous sections; the material here should be considered tips and tricks from people who use Git every day. You thus don’t need to use these things unless you want to, and you can even skip this part of the lesson if you like!\nIf you are interested in learning more about Git in general, here are some reading tips for you:\n\nGit cheat-sheet\nA simple Git guide\nResources to learn Git\nGit reference manual\n\n\n9.1 Forking\nWhen you want to work on an Open Source project that is available on e.g. GitHub, you usually don’t have permission to directly push code to the project’s repository - this is so that the project’s maintainers are the only ones that can directly change anything in their codebase. How do you then contribute to projects that don’t allow you to push your code to their repository? Simple: use forking!\nForking is when you make your own copy of a repository on your GitHub account, which you will then have permissions to change as you see fit. You can then create pull requests from your fork to the original repository, rather than pushing code to a new branch and making a pull request from that. Working with forks just adds an additional step to the whole workflow: instead of being “clone; code and commit changes on a new branch; push branch to remote; pull request from branch” it becomes “fork; clone; code and commit changes; push code to fork; pull request from fork”.\nYou might also want to do a fork of a project simply because you want to have your own copy of it as well, without ever having the intention of changing it. This is, of course, perfectly fine as well, but do keep in mind that developers are usually quite happy to incorporate new changes from contributors if they are reasonable and fulfil a purpose and add functionality to the project. It is quite common that you have a use-case the maintainer didn’t think of before, and that you’ve helped the project grow by contributing your code!\n\n\n9.2 Amending commits\nOnce in a while you’ll have just committed something to your Git repo and immediately remembered that you forgot to add something small, or perhaps you saw an error somewhere. While you can certainly just add that and make a new commit, wouldn’t it be nicer if you could just make the change as if it was already a part of the first commit? Well, you can! Just make the change, stage it and the commit together with the --amend flag, like so:\ngit add &lt;file&gt;\ngit commit --amend\nThis will add the staged changes to the previous commit as if they had always been there. Be careful, though! This will actually rewrite history, meaning that it only works if you only amended local changes. If you had already pushed the first commit to a remote repository you would run into trouble: you will be able to make the amend without issue, but you’ll get an error when you try to push your new changes, since the remote already contains the first version of the commit and can’t simply rewrite what it already has.\nAmending changes is thus a good way to fix small mistakes you realise you made just after committing them, as long as you only amend local changes!\n\n\n9.3 Rebasing\nThe git rebase command is an alternative to git merge in that it solves the same problem: getting changes in one branch into another branch. We’ve already gone through merging extensively, so how is rebasing different? Let’s look at a common case: a feature-branch which we want to get into the main branch.\n\nRecall that a merge creates a merge commit, something akin to Merge branch 'feature-branch' into main or similar. This is a new commit that didn’t exist that brings the changes on feature-branch into main, but it contains no actual work itself. This is both a good and a bad thing: good, because merging is a safe, non-destructive operation (it doesn’t alter history); bad, because it can make the history itself look quite messy. These are the commands used and what the history will look like afterwards:\ngit switch main\ngit merge feature-branch\n\n(The commit with the dashed border is the merge commit.)\nRebasing, on the other hand does not create merge commits. Indeed, what rebase does is to “re-base” one branch on the other, i.e. pretend that new changes were done on a different base than what actually happened (hence the name). Getting our feature-branch onto main using rebase actually entails two separate steps: first the rebase itself, followed by a fast-forward merge:\ngit switch feature-branch\ngit rebase main\n\nThis step rebases our feature-branch on top of main, meaning that we pretend that the commits on feature-branch were done based on the latest commits on main - you can also think of it as moving the entire feature-branch to the tip of the main branch. The commits with the dashed borders here indicate brand new commits; rebasing can’t somehow move the commits to the new base, rather it has to “replay” those commits as if they were done on the new base.\ngit switch main\ngit merge feature-branch\n\nWe’ve now got our feature-branch commits onto main with a single, linear history without any merge commits! We did have to rewrite history, though, when we did the rebase itself. As with amending (see above), this is fine if we’re only working locally, but we’ll quickly run into trouble if we try to rebase things that have already been pushed. We can rebase on top of remote things, of course, since we’re not changing any remote history, only the local history. Be careful when you rebase!\n\n\n9.4 Rebasing as clean-up\nIf the above section felt scary, don’t worry! There’s another highly useful use-case for git rebase that doesn’t risk destroying any history, namely local clean-up!\nLet’s imagine you’ve worked on your local feature-branch for some time, and you have a number of commits on it. Some are highly related to each other and might actually be better suited as a single commit. You’ve also spotted a spelling error in one commit message, and realised that you missed important information in another. We can actually solve all of these issues with an interactive rebase! If you have 4 commits on your branch you can type the following:\ngit rebase -i HEAD~4\nThe -i flag means interactive, while HEAD~4 means 4 commits back from HEAD. This will open your default text editor and give you a selection looking something like this:\npick 0abf162 First feature commit\npick befc682 A minor change on the first commit\npick c9d1426 A commit with an uncomplete commit message\npick 2e0cb97 A commit with a spelling mitake\n\n# Rebase 879ddcc..0abf162 onto 879ddcc (4 commands)\n#\n# Commands:\n# p, pick &lt;commit&gt; = use commit\n# r, reword &lt;commit&gt; = use commit, but edit the commit message\n# e, edit &lt;commit&gt; = use commit, but stop for amending\n# s, squash &lt;commit&gt; = use commit, but meld into previous commit\n\n(... more instructions ...)\nThe commits are ordered with the most recent one at the bottom. The commented instructions (all of which are not shown here) show you what alternatives you have to work with; all you have to do is to change the pick keyword next to the commit hashes to whatever keyword you need from the list, save and exit.\nIn order to solve the toy example here we might decide that the four keywords should be pick, squash, reword and reword, from top to bottom. Once that’s done simply save and exit, and another instance of your default text editor will open for you to complete the specified changes. In the case above we’d get two separate new instances where we can change the commit message - these work the same as any normal commit.\nInteractive rebasing is thus well-suited for fixing and cleaning of local changes you have yet to push anywhere, even if you don’t use rebasing as an alternative to merging! This can make your Git history both cleaner and more concise, which is great when you’re collaborating with others.\n\n\n9.5 Resetting\nSometimes you’ll want to simply discard changes you’ve already committed. This should, however, be something that you rarely have to do. Completely moving back to a previous commit is something called a hard reset, which can be accomplished like so:\ngit reset --hard 5b83463\nYou specify the commit you wish to return to, discarding all other changes, including any changes done to the working directory. It goes without saying that this command is among the most dangerous commands available in Git and should be used with caution.\n\n\n9.6 The reflog\nWe have shown many ways to work with Git and its various commands, and it occasionally happens that errors are introduced - especially when you’re not careful with using git commit --amend, git rebase or git reset on remote changes. This is where the reflog comes in. Think of the reflog as Git’s “safety net”: it stores almost every change you make to a Git repository (regardless of whether you commit the change) in a chronological manner. The following is an example of what the output of the git reflog command might show:\n58deba6 HEAD@{0}: merge: feature-branch: Fast-forward\n8c80c88 HEAD@{1}: checkout: moving from feature-branch to main\n555544a HEAD@{2}: commit: feature development 2\n4c92630 HEAD@{3}: commit: feature development 1\n8c80c88 HEAD@{4}: checkout: moving from main to feature-branch\nIt shows the most recent change at the top, notified by HEAD@{0}. We thus have a merging of feature-branch into main, a checkout (switch) into main, two commits on feature-branch and a checkout into feature-branch - reading it backwards we get a chronological log of what has happened.\nThe reflog is incredibly useful for when you’ve lost something you later realise you want to access again, such as when you’ve just used git reset. The reflog might look like this, for example:\nbc3641f HEAD@{0}: reset: moving to HEAD~2\ncaf9321 HEAD@{1}: commit: More work on the feature\n1bc36af HEAD@{2}: commit: Work on a new feature\nWe see two commits related to some new feature and a reset to HEAD~2 (two commits back from HEAD). If we realise that we actually liked the work we just threw away we can move around in the reflog in a similar manner we do normal commits:\ngit reset HEAD@{1}\nThis will put us back to the state we were in before we used git reset. We here refer to the reflog using the HEAD@{N} notation, which differs from the usual HEAD~N notation so that it is clear if it is the commit history or the reflog that is intended. While the reflog is hopefully not something you’ll have to use often it’s quite useful to know it exists, if only to be able to search the internet for more details regarding a problem you’ve encountered!\n\n\n9.7 Decorating your prompt\nWhen you are working on the command line interface (CLI), you will usually have some small pieces of information relating to your current directory, the name of the computer or host you’re working on, and so forth. You’ve probably already seen your prompt while working with Git throughout this lesson, but here’s an example of what one might look like:\nerikfmbp:~/teaching/workshop-reproducible-research erik.fasterius $\nThe above prompt contains the name of the computer, a colon, the current working directory, the username and a dollar-sign; it is stored in the variable PS1. You can type echo $PS1 to see what variables your prompt is made up of; the above example contains \\h:\\W \\u\\$, where \\h is the hostname, \\W the working directory and \\u the username.\n\n\n\n\n\n\nNote\n\n\n\nIf you’re using zsh instead of bash you’ll have to replace the backslashes (\\) in the commands with percent signs (%).\n\n\nSome people like to also show the current branch on their prompt, thus avoiding having to type git branch continuously. There are several ways you might do this, and we’re only presenting one of them here: a bash function.\ngit_branch() {\n     git branch 2&gt; /dev/null | sed -e '/^[^*]/d' -e 's/* \\(.*\\)/ (\\1)/'\n}\nThis function does a number of things:\n\nEjects the error message from Git if the current directory isn’t a part of a Git repository into /dev/null (i.e. into nothing).\nFind the current branch by searching for a line that starts with * (i.e. the current branch) using the command line program sed.\nPut the current branch into parentheses with a space before it.\n\nWe can then build our new prompt by adding this function into it:\n# The first part of the old prompt\nPS1='\\h:\\W \\u'\n\n# Add the Git branch\nPS1=$PS1'$(git_branch)'\n\n# Add the last part of the old prompt\nPS1=$PS1' \\$'\nNow you should see the current Git branch on your prompt! The only problem now is that this only works for your current session: once you restart your CLI you’ll have to re-define your prompt again. This can be circumvented, though. What you need to do is to add the code defining your prompt into your so-called bash profile: ~/.bash_profile. Every time you load a new CLI session this file is read and any code inside it is executed. You might already have this file, so make sure you don’t overwrite it!\n\n\n9.8 Bash aliases for git\nSome Git commands are used over and over again when working with git, such as git status. Some people like to have aliases (i.e. shortcuts) for these common commands. Here is a small list of such aliases that you may find useful or, even better, might inspire you to create your own! Add them to your ~/.bash_profile as above, so that they’re available across sessions.\n# Basic Git commands\nalias ga='git add'\nalias gb='git branch'\nalias gc='git commit'\nalias gd='git diff'\nalias gl='git log'\nalias gm='git merge'\nalias gp='git push'\nalias gt='git tag'\nalias gu='git pull'\nalias gw='git switch'\n\n# Git status in short format\nalias gs='git status --short'\n\n# Show diff of staged files\nalias gds='git diff --staged'\n\n# Add and commit all tracked and modified files\nalias gca='git commit --all'\n\n# Create and switch to a new branch\nalias gwc='git switch --create'\n\n# Git log with one line per commit\nalias glo='git log --oneline'\n\n\n9.9 Pretty logs\nIf you want to customise e.g. the format and the colours of the logs you can use the gitconfig file (the same one we added things to using git config --global user.name \"Mona Lisa\" in the pre-course setup). You can read more about exactly what you can do at the documentation for Git configs and pretty formats, but we’ll provide two examples here:\n[format]\n    pretty = format:%C(yellow)commit %H %C(auto)%d %nAuthor: %C(cyan)%aN %C(italic reset)(%ae) %nDate:   %C(blue)%ar %C(italic reset)(%ai) %n%n%C(bold reset)%w(0,6,6)%s%n%C(reset)%+b\nThis first example alters the format of the default git log command. It looks similar to what you’d be used to seeing with that command, except his has some colour highlights and adds the relative date (e.g. “1 hour ago” and similar relative times).\n[pretty]\n    line = format:%C(yellow)%h %C(blue)%&gt;(12)%ar %C(cyan)%aN%C(auto)%d %C(reset)%s\nThis second example is a custom format that can be called using git log --pretty=&lt;format-name&gt;, and is similar to the built-in --oneline flag, but also containing nicer colours, the relative date as well as the author name ;the format name line here is used for its similarity to oneline. You can add any number of custom formats you like using such config specifications. If you’re using aliases as in the section above you might change the glo alias to be git log --pretty=line instead, which will give you the nicer log on one line."
   },
   {
-    "objectID": "pages/markdown.html",
-    "href": "pages/markdown.html",
-    "title": "Markdown",
+    "objectID": "pages/jupyter.html",
+    "href": "pages/jupyter.html",
+    "title": "Working with Jupyter",
     "section": "",
-    "text": "A markup language is a system for annotating text documents in order to e.g. define formatting. HTML, if you are familiar with that, is an example of a markup language. HTML uses tags, such as:\n&lt;h1&gt; Heading &lt;/h1&gt;\n&lt;h2&gt; Sub-heading &lt;/h2&gt;\n&lt;a href=\"www.webpage.com\"&gt; Link &lt;/a&gt;\n&lt;ul&gt;\n  &lt;li&gt; List-item1 &lt;/li&gt;\n  &lt;li&gt; List-item2 &lt;/li&gt;\n  &lt;li&gt; List-item3 &lt;/li&gt;\n&lt;/ul&gt;\nMarkdown is a lightweight markup language which uses plain-text syntax in order to be as unobtrusive as possible, so that a human can easily read it. Look at the following toy example:\n# A header\n\nA [link](http://example.com).\n\n## A sub-header\n\nText attributes _italic_, *italic*, **bold**, `monospace`.\n\n### A deeper sub-header\n\nBullet list:\n\n  - Apples\n  - Oranges\n  - Pears\nThis would render to something like this:\n\nA markdown document can be converted to other formats, such as HTML or PDF, for viewing in a browser or a PDF reader; in fact, the page you are reading right now is written in markdown. Markdown is somewhat ill-defined, and as a consequence of that there exist many implementations and extensions. They share most of the syntax, however, with various additions on top.\nThere are a lot more things you can do with markdown than what we show here. Indeed, this entire course is mostly written in markdown! You can read more about markdown here."
+    "text": "The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain code, equations, visualizations and text. The functionality is partly overlapping with Quarto (see the tutorial), in that they both use markdown and code chunks to generate reports that integrate results of computations with the code that generated them. Jupyter Notebook comes from the Python community while Quarto was developed by Posit (who also created R Markdown and RStudio), but you could use most common programming languages in either alternative. In practice though, it’s quite common that R developers use Jupyter but probably not very common that Python developers use RStudio. Some reasons to use Jupyter include:\n\nPython is lacking a really good IDE for doing exploratory scientific data analysis, like RStudio or Matlab. Some people use Jupyter simply as an alternative for that.\nThe Jupyter Project community is large and dynamic, and there are lots of tools for sharing, displaying or interacting with notebooks.\nAn early ambition with Jupyter notebooks (and its predecessor IPython notebooks) was to be analogous to the lab notebook used in a wet lab. It would allow the data scientist to document his or her day-to-day work and interweave results, ideas, and hypotheses with the code. From a reproducibility perspective, this is one of the main advantages.\nJupyter notebooks can be used, just like Quarto, to provide a tighter connection between your data and your results by integrating results of computations with the code that generated them. They can also do this in an interactive way that makes them very appealing for sharing with others.\n\nAs always, the best way is to try it out yourself and decide what to use it for!\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already. Then open up a terminal and go to workshop-reproducible-research/tutorials/jupyter and activate your jupyter-env Conda environment.\n\n\n\n\n\n\nA note on nomenclature\n\n\n\n\nJupyter: a project to develop open-source software, open-standards, and services for interactive computing across dozens of programming languages. Lives at jupyter.org.\nJupyter Notebook: A web application that you use for creating and managing notebooks. One of the outputs of the Jupyter project.\nJupyter lab: A more powerful and feature-rich interface that also includes a terminal, debugger, tabs etc.\nJupyter notebook: The actual .ipynb file that constitutes your notebook."
+  },
+  {
+    "objectID": "pages/jupyter.html#introduction",
+    "href": "pages/jupyter.html#introduction",
+    "title": "Working with Jupyter",
+    "section": "",
+    "text": "The Jupyter Notebook is an open-source web application that allows you to create and share documents that contain code, equations, visualizations and text. The functionality is partly overlapping with Quarto (see the tutorial), in that they both use markdown and code chunks to generate reports that integrate results of computations with the code that generated them. Jupyter Notebook comes from the Python community while Quarto was developed by Posit (who also created R Markdown and RStudio), but you could use most common programming languages in either alternative. In practice though, it’s quite common that R developers use Jupyter but probably not very common that Python developers use RStudio. Some reasons to use Jupyter include:\n\nPython is lacking a really good IDE for doing exploratory scientific data analysis, like RStudio or Matlab. Some people use Jupyter simply as an alternative for that.\nThe Jupyter Project community is large and dynamic, and there are lots of tools for sharing, displaying or interacting with notebooks.\nAn early ambition with Jupyter notebooks (and its predecessor IPython notebooks) was to be analogous to the lab notebook used in a wet lab. It would allow the data scientist to document his or her day-to-day work and interweave results, ideas, and hypotheses with the code. From a reproducibility perspective, this is one of the main advantages.\nJupyter notebooks can be used, just like Quarto, to provide a tighter connection between your data and your results by integrating results of computations with the code that generated them. They can also do this in an interactive way that makes them very appealing for sharing with others.\n\nAs always, the best way is to try it out yourself and decide what to use it for!\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already. Then open up a terminal and go to workshop-reproducible-research/tutorials/jupyter and activate your jupyter-env Conda environment.\n\n\n\n\n\n\nA note on nomenclature\n\n\n\n\nJupyter: a project to develop open-source software, open-standards, and services for interactive computing across dozens of programming languages. Lives at jupyter.org.\nJupyter Notebook: A web application that you use for creating and managing notebooks. One of the outputs of the Jupyter project.\nJupyter lab: A more powerful and feature-rich interface that also includes a terminal, debugger, tabs etc.\nJupyter notebook: The actual .ipynb file that constitutes your notebook."
+  },
+  {
+    "objectID": "pages/jupyter.html#the-basics",
+    "href": "pages/jupyter.html#the-basics",
+    "title": "Working with Jupyter",
+    "section": "2 The basics",
+    "text": "2 The basics\nOne thing that sets Jupyter Notebook apart from what you might be used to is that it’s a web application, i.e. you edit and run your code from your browser. But first you have to start the Jupyter Notebook server. At this point you may either try the classic notebook interface by running:\njupyter notebook --allow-root\nOr give the more feature-rich Jupyter lab interface a try by running:\njupyter lab --allow-root\nWhichever interface you choose you should see something similar to this printed to your terminal:\n[I 18:02:26.722 NotebookApp] Serving notebooks from local directory: /Users/john/workshop-reproducible-research/tutorials/jupyter\n[I 18:02:26.723 NotebookApp] 0 active kernels\n[I 18:02:26.723 NotebookApp] The Jupyter Notebook is running at:\n[I 18:02:26.723 NotebookApp] http://localhost:8888/?token=e03f10ccb40efc3c6154358593c410a139b76acf2cae000\n[I 18:02:26.723 NotebookApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\n[C 18:02:26.724 NotebookApp]\n\n    Copy/paste this URL into your browser when you connect for the first time,\n    to login with a token:\n        http://localhost:8888/?token=e03f10ccb40efc3c6154358593c410a139b76acf2cae785c\n[I 18:02:27.209 NotebookApp] Accepting one-time-token-authenticated connection from ::1\n\n\n\n\n\n\nA note for Windows users\n\n\n\nIf you see the error message Start : This command cannot be run due to the error: The system cannot find the file specified. ... then try starting Jupyter with jupyter notebook --no-browser then copy the URL given into the browser directly.\n\n\n\n\n\n\n\n\nJupyter notebook versions\n\n\n\nDepending on what version of the notebook conda package you have installed, the interface may look slightly different. The screenshots in this tutorial are from version 7, an update which has brought the ‘classic’ Jupyter notebook closer to the Jupyter lab interface. Read more about this update at the Jupyter homepage.\n\n\nThe Jupyter Notebook/Lab interface probably opened up a web browser for you automatically, otherwise go to the address specified in the message in the terminal. Note that the server is running locally (as http://localhost:8888) so this does not require that you have an active internet connection. Also note that it says:\nServing notebooks from local directory: &lt;/some/local/path/workshop-reproducible-research/tutorials/jupyter&gt;\nEverything you do in your Notebook session will be stored in this directory, so you won’t lose any work if you shut down the server.\n\n\n\nWhat you’re looking at is the Notebook dashboard. This is where you manage your files, notebooks, and kernels. The Files tab shows the files in your directory. The Running tab keeps track of all your processes.\nThe Jupyter lab dashboard should look something like this:\n\n\n\nLet’s start by creating an empty notebook. You can do this by selecting the Files tab and clicking New &gt; Notebook. When the notebook opens, select the suggested Python 3 kernel from the drop-down menu.\nThis will open up a new tab or window looking like this:\n\n\n\nStart by giving your notebook a name by clicking on the text “Untitled” at the top of the page. Enter “jupyter-tutorial.ipynb”.\nNote that for most of this tutorial we will describe how you work in the actual notebook and not devote a lot of time to the extra features available in the Jupyter lab interface.\n\n\n\n\n\n\nTip\n\n\n\nIf you want to start Jupyter Notebooks on a cluster that you SSH to (e.g. Uppmax) see the section in the Extra material\n\n\nJupyter notebooks are made up of cells, and you are currently standing in the first cell in your notebook. Your cursor should be blinking in this cell, indicating that you are in “Edit mode” meaning that you can type text in the cell. Pressing the Esc key puts you in “Command mode” which allows you to manipulate the notebook as a whole, more on this later.\nCells in Jupyter notebooks can be of two types:markdown or code.\n\nMarkdown:\n\nThese cells contain static material such as captions, text, lists, images and so on. You express this using Markdown, which is a lightweight markup language. Markdown documents can then be converted to other formats for viewing (the document you’re reading now is written in Markdown and then converted to HTML). The format is discussed a little more in detail in the Quarto tutoriall. Jupyter Notebook uses a dialect of Markdown called GitHub Flavoured Markdown, which is described here.\n\nCode:\n\nThese are the cells that actually do something, just as code chunks do in Quarto/R Markdown. You can write code in dozens of languages and do all kinds of clever tricks. You then run the code cell and any output the code generates, such as text or figures, will be displayed beneath the cell. We will get back to this in much more detail, but for now it’s enough to understand that code cells are for executing code that is interpreted by a kernel (in this case the Python version in your Conda environment).\nBefore we continue, here are some shortcuts that can be useful. Note that they are only applicable when in “Command mode”. Most of them are also available from the menus. You can also view this list of shortcuts from the Help menu under “Show Keyboard Shortcuts”.\n\n\n\nShortcut\nEffect\n\n\n\n\nenter\nEnter Edit mode\n\n\nescape\nEnter Command mode\n\n\nctrl-enter\nRun the cell\n\n\nshift-enter\nRun the cell and select the cell below\n\n\nalt-enter\nRun the cell and insert a new cell below\n\n\ns\nSave the notebook\n\n\ntab\nFor code completion or indentation\n\n\nm,y\nToggle between Markdown and Code cells\n\n\nd-d\nDelete a cell\n\n\na\nInsert cells above current cell\n\n\nb\nInsert cells below current cell\n\n\nx\nCut currently selected cells\n\n\nv\nPaste cell below\n\n\no\nToggle output of current cell"
+  },
+  {
+    "objectID": "pages/jupyter.html#writing-markdown",
+    "href": "pages/jupyter.html#writing-markdown",
+    "title": "Working with Jupyter",
+    "section": "3 Writing markdown",
+    "text": "3 Writing markdown\nLet’s use our first cell to create a header. Change the format from Code to Markdown using the drop-down list in the Notebook Toolbar, or by pressing the m key when in command mode. Double click on the cell, or hit enter to enter editing mode and input “# My notebook” (“#” is used in Markdown for header 1). Run the cell with ctrl-enter (cmd-enter on Mac).\nMarkdown is a simple way to structure your notebook into sections with descriptive notes, lists, links, images etc.\nBelow are some examples of what you can do in markdown. Paste all or parts of it into one or more cells in your notebook to see how it renders. Make sure you set the cell type to Markdown.\n## Introduction\nIn this notebook I will try out some of the **fantastic** concepts of Jupyter\nNotebooks.\n\n## Markdown basics\nExamples of text attributes are:\n\n- *italics*\n- **bold**\n- `monospace`\n\nSections can be separated by horizontal lines.\n\n---\n\nBlockquotes can be added, for instance to insert a Monty Python quote:\n\n&gt; Spam!\n&gt; Spam!\n&gt; Spam!\n&gt; Spam!\n\nSee [here](https://jupyter-notebook.readthedocs.io/en/stable/examples/Notebook/Working%20With%20Markdown%20Cells.html) for more information."
+  },
+  {
+    "objectID": "pages/jupyter.html#writing-code",
+    "href": "pages/jupyter.html#writing-code",
+    "title": "Working with Jupyter",
+    "section": "4 Writing code",
+    "text": "4 Writing code\nNow let’s write some code! Since we chose a Python kernel, Python would be the native language to run in a cell. Enter this code in the second cell and run it:\nprint(\"Hello world!\")\nNote how the output is directly displayed below the cell. This interactive way of working is one of the things that sets Jupyter Notebook apart from RStudio and Quarto. In RStudio/Quarto, documents are typically rendered top-to-bottom in one run, while you work in a Jupyter notebook in a different way. This requires some special attention when it comes to reproducibility, which we will get back to in the reproducibility section.\nWhat is a Jupyter notebook? Let’s look a closer at the notebook we’re currently working in. Jupyter Notebooks are autosaved every minute or so, so you will already have it available. We can be a little meta and do this from within the notebook itself, by running some shell commands in a code cell. This very handy functionality is possible by prepending the command with !. Try adding !ls to a code cell and run it. This will list the files in the current directory.\nAha, we have a new file called jupyter-tutorial.ipynb! This is our notebook. Look at the first ten lines of the file by using !head jupyter-tutorial.ipynb. Seems like it’s just a plain old JSON file. Since it’s a text file it’s suitable for version control with for example Git. There are however some special considerations to take into account for Notebooks which we will cover in the reproducibility section of this tutorial.\nVariables defined in cells become variables in the global namespace. You can therefore share information between cells. Try to define a function or variable in one cell and use it in the next. For example, add the following to a new cell and run it:\ndef print_me(str):\n    print(str)\nNow create a new cell and add:\nprint_me(\"Hi!\")\nYour notebook should now look something like this.\n\n\n\nThe focus of this tutorial is not on how to write Markdown or Python; you can make really pretty notebooks with Markdown and you can code whatever you want with Python. Rather, we will focus on the Jupyter Notebook features that allow you to do a little more than that.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nThat a Jupyter notebook consists of a series of cells, and that they can be either markdown or code cells.\nThat we execute the code in a code cell with the kernel that we chose when opening the notebook.\nWe can run shell commands by prepending them with !.\nA Jupyter notebook is simply a text file in JSON format."
+  },
+  {
+    "objectID": "pages/jupyter.html#magics",
+    "href": "pages/jupyter.html#magics",
+    "title": "Working with Jupyter",
+    "section": "5 Magics",
+    "text": "5 Magics\nMagics constitute a simple command language that significantly extends the power of Jupyter notebooks. There are two types of magics:\n\nLine magics: Commands that are prepended by %, and whose arguments only extend to the end of the line.\nCell magics: Commands that start with %% and then apply to the whole cell. Must be written on the first line of a cell.\n\nNow list all available magics with %lsmagic (which itself is a magic). You add a question mark to a magic to show the help (e.g. %lsmagic?). Some of them act as shortcuts for commonly used shell commands (%ls, %cp, %cat, ..). Others are useful for debugging and optimizing your code (%timeit, %debug, %prun, ..). For more information see the magics documentation.\nA very useful magic, in particular when using shell commands a lot in your work, is %%capture. This will capture the stdout/stderr of any code cell and store them in a Python object. Run %%capture? to display the help and try to understand how it works. Try it out with either some Python code, other magics or shell commands. Here is an example of how you can make it work:\n%%capture output\n%%bash\necho \"Print to stdout\"\necho \"Print to stderr\" &gt;&2\n… And in another cell:\nprint(\"stdout:\" + output.stdout)\nprint(\"stderr:\" + output.stderr)\n\nTip  You can capture the output of some magics directly like this: my_dir = %pwd.\n\nThe %%script magic is used for specifying a program (Bash, Perl, Ruby, ..) with which to run the code (similar to a shebang). For some languages it’s possible to use these shortcuts:\n\n%%ruby\n%%perl\n%%bash\n%%html\n%%latex\n%%R\n\n\n\n\n\n\n\nA note on R code\n\n\n\nIn order to use the %%R magic you need to install the rpy2 extension, for example with Conda. This package is already installed in the jupyter-env environment you’re using for this tutorial. However, you also have to load it by running %load_ext rpy2.ipython in a cell.\n\n\nTry this out if you know any of the languages above. Otherwise you can always try to print the quadratic formula with LaTeX!\n\\begin{array}{*{20}c} {x = \\frac{{ - b \\pm \\sqrt {b^2 - 4ac} }}{{2a}}} & {{\\rm{when}}} & {ax^2 + bx + c = 0} \\\\ \\end{array}\nAnother useful magic is %precision which sets the floating point precision in the notebook. As a quick example, add the following to a cell and run it:\nfloat(100/3)\nNext set the precision to 4 decimal points by running a cell with:\n%precision 4\nNow run the cell with float(100/3) again to see the difference.\nRunning %precision without additional arguments will restore the default.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nThe basics of Jupyter magics and the difference between line magics and cell magics\nHow to capture and use output from notebook cells with %%capture\nHow to use magics to run non-Python code in notebooks"
+  },
+  {
+    "objectID": "pages/jupyter.html#plotting",
+    "href": "pages/jupyter.html#plotting",
+    "title": "Working with Jupyter",
+    "section": "6 Plotting",
+    "text": "6 Plotting\nAn essential feature of Jupyter Notebooks is of course the ability to visualize data and results via plots. A full guide to plotting in Python is beyond the scope of this course, but we’ll offer a few glimpses into the plotting landscape of Python.\nFirst of all, Python has a library for plotting called matplotlib, which comes packed with functionality for creating high-quality plots. Below is an example of how to generate a line plot of a sine wave.\n# Import packages\nimport numpy as np\nimport matplotlib.pyplot as plt\n# Generate a set of evenly spaced numbers between 0 and 100\nx = np.linspace(0,3*np.pi,100)\n# Use the sine function to generate y-values\ny = np.sin(x)\n# Plot the data\nline, = plt.plot(x, y, color='red', linestyle=\"-\")\nBy default plots are rendered in the notebook as rasterised images which can make the quality poor. To render in scalable vector graphics format use the set_matplotlib_formats from the matplotlib_inline package:\nimport matplotlib_inline\nmatplotlib_inline.backend_inline.set_matplotlib_formats('pdf', 'svg')\nNow try running the code for the sine wave plot again."
+  },
+  {
+    "objectID": "pages/jupyter.html#other-packages-for-plotting",
+    "href": "pages/jupyter.html#other-packages-for-plotting",
+    "title": "Working with Jupyter",
+    "section": "7 Other packages for plotting",
+    "text": "7 Other packages for plotting\nAs we mentioned Matplotlib comes with a lot of functionality which is great because it allows you to create all sorts of plots and modify them exactly to your liking. However, this can also mean that creating very basic plots might involve a lot of cumbersome coding, when all you want is a simple bar chart!\nFortunately there are a number of Python packages that build upon matplotlib but with a much simplified interface. One such popular package is Seaborn. Below we’ll see how to generate a nice looking bar plot with error bars.\nFirst import the Seaborn package (using an abbreviated name to simplify typing):\nimport seaborn as sns\nNext we’ll load some example data of penguins collected at the Palmer Station, in Antarctica.\npenguins = sns.load_dataset(\"penguins\")\n# Look at first 5 lines of the data\npenguins.head(5)\nThe most basic way to generate a bar plot of this data with Seaborn is:\nsns.barplot(data=penguins)\nSimple right? Yes, but maybe not very informative. Here Seaborn simply calculates the mean of all numeric variables for the penguins and plots them with error bars representing a 95% confidence interval.\nLet’s say that instead we want to plot the mean value of the body mass of the penguins at the different islands where they were examined.\nsns.barplot(data=penguins, x=\"island\", y=\"body_mass_g\", errorbar=\"sd\");\nHere we specified to use values in the ‘island’ column as categories for the x-axis, and values in the ‘body_mass_g’ column as values for the y-axis. The barplot function of Seaborn will then calculate the mean body mass for each island and plot the bars. With errorbar=\"sd\" we tell the function to draw the standard deviation as error bars, instead of computing a confidence interval.\nIf we instead want to visualize the data as a scatterplot we can use the sns.scatterplot function. Let’s plot the body mass vs bill length for all penguins and colour the data points by species. We’ll also move the legend outside of the plotting area and modify the x and y-axis labels:\n# Store the matplotlib axes containing the plot in a variable called 'ax'\nax = sns.scatterplot(data=penguins, x=\"bill_length_mm\", y=\"body_mass_g\",\n                     hue=\"species\")\n# Modify the labels of the plot\nax.set_xlabel(\"Bill length (mm)\")\nax.set_ylabel(\"Body mass (g)\")\n# Set legend position outside of plot\nax.legend(bbox_to_anchor=(1,1));\nIf you want to save a plot to file you can use the plt.savefig function. Add the following to the bottom of the cell with the scatterplot code:\nplt.savefig(\"scatterplot.pdf\", bbox_inches=\"tight\")\nThe bbox_inches=\"tight\" setting ensures that the figure is not clipped when saved to file.\nThe Seaborn website contains great tutorials and examples of other ways to plot data!\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to generate simple plots with matplotlib\nHow to import and use the Seaborn package for plotting\nHow to save plots from notebooks to a file"
+  },
+  {
+    "objectID": "pages/jupyter.html#widgets",
+    "href": "pages/jupyter.html#widgets",
+    "title": "Working with Jupyter",
+    "section": "8 Widgets",
+    "text": "8 Widgets\nSince we’re typically running our notebooks in a web browser, they are quite well suited for also including more interactive elements. A typical use case could be that you want to communicate some results to a collaborator or to a wider audience, and that you would like them to be able to modify how the results are displayed. It could, for example, be to select which gene to plot for, or to see how some parameter value affects a clustering. Jupyter notebooks has great support for this in the form of widgets.\nWidgets are eventful Python objects that have a representation in the browser, often as a control like a slider, text box, etc. These are implemented in the ipywidgets package.\nThe easiest way to get started with using widgets are via the interact and interactive functions. These functions auto-generate widgets from functions that you define, and then call those functions when you manipulate the widgets. This might sound abstract so let’s look at an example.\nLet’s take the scatterplot of the penguins dataset that we generated in the previous section and add widgets that lets us choose variables to plot as well as coloring of the points.\nFirst we’ll import the interactive function from ipywidgets. Let’s also import the widgets module which we we’ll use later. Add the following code to a cell and run it:\nfrom ipywidgets import interactive, widgets\nNow, in a new cell, define a function called scatterplot with the code to generate the plot itself. Also add a palette argument to the function so that we can specify the colour palette to use for the plot. The function should look like this:\ndef scatterplot(x, y, hue, palette):\n    ax = sns.scatterplot(data=penguins, x=x, y=y, hue=hue, palette=palette)\nRun the cell and create a new cell below it.\nNext, we’ll use the interactive function to generate a widget to control the x, y, hue and palette arguments. The interactive function takes a function as its first argument, and then keyword arguments for each of the arguments in the function. The returned value is a widget which we will store in a variable called interactive_scatterplot. Add the following to a cell and run it:\ninteractive_scatterplot = interactive(scatterplot,\n            x=[\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\",\"body_mass_g\"],\n            y=[\"body_mass_g\",\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\"],\n            hue=[\"species\",\"island\",\"sex\"],\n            palette=[\"Set1\",\"Set2\",\"Dark2\",\"Paired2\"])\nImportantly, all parameters defined in the scatterplot function must be given in the interactive call. The interactive_scatterplot widget is now tied to the scatterplot function. However, we still haven’t displayed the widget itself. To do that, simply add interactive_scatterplot to a new cell and run it:\ninteractive_scatterplot\nThis should show the scatterplot with drop-down menus for each of the arguments. Try changing the x and y variables to plot by selecting from the respective drop-downs. The hue drop-down now lets you change what variable to use for colouring the points and the palette drop-down changes the colour palette. As you can see, the available options in the drop-downs are the ones we specified in the interactive call.\nDepending on the type of the passed argument different types of widgets will be created by interactive. For instance:\n\nint or float arguments will generate a slider\nbool arguments (True/False) will generate checkbox widgets\nlist arguments will generate a drop-down\nstr arguments will generate a text-box\n\nLet’s add a slider to control the size of the points. In the Seaborn package this is controlled by the s argument to the scatterplot function. Modify the cell with your scatterplot function so it looks like this (remember to run the cell in order to update the function definition):\ndef scatterplot(x, y, hue, palette, size=50):\n    ax = sns.scatterplot(data=penguins, x=x, y=y, hue=hue, palette=palette, s=size)\nNote that we added a size argument to the function and supplied it to the Seaborn scatterplot call with s=size. Setting size=50 in the function definition means that the default size of the points will be 50.\nNow we need to add a slider for the size argument. Update the cell where we call the interactive function so that it looks like this, then run it:\ninteractive_scatterplot = interactive(scatterplot,\n            x=[\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\",\"body_mass_g\"],\n            y=[\"body_mass_g\",\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\",],\n            hue=[\"species\",\"island\",\"sex\"],\n            palette=[\"Set1\",\"Set2\",\"Dark2\",\"Paired2\"],\n            size=(20,100,10))\nHere the size argument is defined as a tuple which sets the minimum value of the slider to 20, the maximum value to 100 and the step size to 10.\nFinally, re-run the cell where we displayed the interactive_scatterplot widget. You should now see a slider for the size argument (starting at 50). Try changing the size of the points by moving the slider.\nThis is how it should look if everything works.\n\nThere are lots of widgets, e.g.:\n\nDrop-down menus\nToggle buttons\nRange sliders\nFile uploader\n\n… And much, much more. Here is a list of all available widgets together with documentation and examples. Some of these widgets cannot be auto-generated by interactive, but fear not! Instead of relying on auto-generation we can define the widget and supply it directly to interactive.\nTo see this in practice, we’ll modify the scatterplot function to display a title and add a color picker widget that let’s us set the color of the title text.\nFirst, update the scatterplot function so that it looks like this:\ndef scatterplot(x, y, hue, palette, size, color):\n    ax = sns.scatterplot(data=penguins, x=x, y=y, hue=hue, palette=palette, s=size)\n    ax.set_title(\"Penguin scatterplot\", color=color)\nThen run the cell to update the function definition.\nNext, we’ll define the colour picker widget. Add the definition to the cell where you defined the interactive_scatterplot then supply the widget to the interactive call. The cell should look like this:\ncolorpicker = widgets.ColorPicker(\n    concise=False,\n    description='Title color',\n    value='blue',\n    disabled=False\n)\ninteractive_scatterplot = interactive(scatterplot,\n            x=[\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\",\"body_mass_g\"],\n            y=[\"body_mass_g\",\"bill_length_mm\",\"bill_depth_mm\",\"flipper_length_mm\"],\n            hue=[\"species\",\"island\",\"sex\"],\n            palette=[\"Set1\",\"Set2\",\"Dark2\",\"Paired2\"],\n            size=(20, 100, 10),\n            color=colorpicker)\nRun the cell to update the widgets.\nFinally, re-run the cell where we displayed the interactive_scatterplot. The plot should now have a title and you should see a new color picker below the slider for the point size. Try changing the title colour by clicking on the new color picker.\n\n\n\n\n\n\nCaution\n\n\n\nNote that you may have to close the colour picker once you’ve made your choice in order to make the plot update."
+  },
+  {
+    "objectID": "pages/jupyter.html#other-interactive-plots",
+    "href": "pages/jupyter.html#other-interactive-plots",
+    "title": "Working with Jupyter",
+    "section": "9 Other interactive plots",
+    "text": "9 Other interactive plots\nJupyter widgets, like we used here, is the most vanilla way of getting interactive graphs in Jupyter notebooks. Some other alternatives are:\n\naltair is a plotting library that uses Vega-Lite grammar which is reminiscent of ggplot2 in R. The syntax is different from what we’ve shown here, but it’s very powerful once you get the hang of it.\nPlotly is actually an API to a web service that renders your graph and returns it for display in your Jupyter notebook. Generates very visually appealing graphs, but from a reproducibility perspective it’s maybe not a good idea to be so reliant on a third party.\nBokeh is another popular tool for interactive graphs. Most plotting packages for Python are built on top of matplotlib, but Bokeh has its own library. This can give a steeper learning curve if you’re used to the standard packages.\nmpld3 tries to integrate matplotlib with Javascript and the D3js package. It doesn’t scale well for very large datasets, but it’s easy to use and works quite seamlessly.\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to implement interactive widgets in notebooks"
+  },
+  {
+    "objectID": "pages/jupyter.html#extensions",
+    "href": "pages/jupyter.html#extensions",
+    "title": "Working with Jupyter",
+    "section": "10 Extensions",
+    "text": "10 Extensions\nJupyter Notebook extensions are add-ons that can increase the functionality of your notebooks. Extensions include themes, editors, git support, renderers and much more. The most user-friendly way of managing extensions is via the Extension Manager available in the Jupyter lab interface. You can access it by clicking the puzzle piece icon in the left sidebar.\n\n\n\n\n\n\nCaution\n\n\n\nNote that the extension manager contains a disclaimer warning you that these third-party extensions are not reviewed for vulnerabilities which means that you should be careful about what extensions you install.\n\n\nYou can use the search field to perform a free text search for available extensions, then click ‘Install’ to install an extension. Not that in some cases you will be prompted to install additional packages.\nWhile an in-depth listing of available extensions is well beyond the scope of this tutorial we offer this list of a few extensions that are of particular relevance to this course:\n\nJupyterlab/GitHub - view and open files from GitHub\nJupyterlab/Git - version controlling with git\nmamba-org/gator-lab - manage Conda environments\nvoila-dashboards/Jupyterlab-preview - preview a rendered version of your notebook\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nWhat Jupyter extensions are and how to manage them"
+  },
+  {
+    "objectID": "pages/jupyter.html#reproducibility",
+    "href": "pages/jupyter.html#reproducibility",
+    "title": "Working with Jupyter",
+    "section": "11 Reproducibility",
+    "text": "11 Reproducibility\nNow that you have a feeling for what Jupyter can do we’ll spend a little time on things to consider specifically from a repdroducibility point of view when it comes to Jupyter notebooks."
+  },
+  {
+    "objectID": "pages/jupyter.html#version-control-of-jupyter-notebooks",
+    "href": "pages/jupyter.html#version-control-of-jupyter-notebooks",
+    "title": "Working with Jupyter",
+    "section": "12 Version control of Jupyter notebooks",
+    "text": "12 Version control of Jupyter notebooks\nAs we’ve seen, Jupyter notebooks are plain-text JSON files. This means that they can be version controlled with Git just like any other text file. However, because of the way Jupyter notebooks store their content, the diffs produced by Git can be difficult to interpret. Luckily, there are tools that can provide content-aware diffs and merge functionality for Jupyter notebooks.\nOne such tool is nbdime. nbdime is built to understand the structure of Jupyter notebooks and can therefore generate diffs that are easier to read. It can also be used to merge changes made to notebooks, which is great especially when collaborating on notebooks with others.\nnbdime is already installed in the jupyter-env Conda environment you are using for this tutorial. To try it in action, create a new notebook and name it Analysis.ipynb. Add the following code to the first cell, then run it:\nimport numpy as np\nimport seaborn as sns\npenguins = sns.load_dataset(\"penguins\")\nThis simply imports some python modules and loads a dataset.\nSave the notebook. Now we’ll add and commit the new notebook to the Git repository:\ngit add Analysis.ipynb\ngit commit -m \"Add Analysis notebook\"\nSo far so good. And nothing new here compared to what we’ve already learned about version control. Now let’s make some changes to the notebook. First we’ll replace one of the loaded modules. Update the first cell of the notebook so that it reads:\nimport pandas as pd\nimport seaborn as sns\npenguins = sns.load_dataset(\"penguins\")\nThen create a new cell where we’ll calculate the mean of each numeric value grouped by species. In the new cell, add the following code:\npenguins.groupby(\"species\").mean(numeric_only=True)\nRun the cell and save the notebook.\nNow use git diff to view the changes we’ve made to the notebook. Run:\ngit diff Analysis.ipynb\nEven with very minor modifications to the notebook the diff will contain numerous lines that are difficult to interpret. This is because the notebook not only contains the code, but also cell metadata and output (in this case a table produced by the second cell).\nNow let’s generate a more easy-to-read diff. Run:\nnbdiff -s Analysis.ipynb\nThis will use the nbdiff tool that comes with nbdime to show an inline diff of the notebook. The -s flag tells nbdiff to only show differences for the actual code changes, ignoring changes in metadata and output. There are a number of flags you can use here to customise the diff. The uppercase version of each flag will ignore the respective change type. For example, to see the diff but ignore changes to the output of cells you can run:\nnbdiff -O Analysis.ipynb\nnbdime also comes with a graphical web-based diff viewer. To try it, run:\nnbdiff-web Analysis.ipynb\nThis will open up a tab in your web browser showing you changes made to the notebook side-by-side for each cell, including also cell output. This makes it easy to see changes made both to code and outputs such as tables and plots.\n\n12.1 Other tools for version control of notebooks\n\nYou can also install the nbdime jupyter lab extension to get access to the diff functionality directly from the Jupyter lab interface. If you also install the jupyterlab-git extension you can both view diffs and commit changes directly from Jupyter lab.\nVS Code actually comes with built-in support for both Jupyter notebooks and Git so that you can view informative diffs directly from the editor"
+  },
+  {
+    "objectID": "pages/jupyter.html#making-sure-notebooks-work-as-expected",
+    "href": "pages/jupyter.html#making-sure-notebooks-work-as-expected",
+    "title": "Working with Jupyter",
+    "section": "13 Making sure notebooks work as expected",
+    "text": "13 Making sure notebooks work as expected\nOne of the great things with Jupyter notebooks is the ability to do data exploration in an interactive way. Because loaded data, defined variables and functions remain in the notebook until you restart the kernel, you can easily make changes to your analysis and re-run cells to see the effect of the changes immediately. However, this can also be a source of errors and inconsistencies if you, during your work, modify or use variables in cells upstream of their initial definition.\nThe nbval package can help you catch these types of errors. nbval is a plugin for the pytest testing framework that can be used to test Jupyter notebooks. It works by executing each cell in the notebook and comparing the output to the output stored in the notebook. If the output is the same, the test passes. If the output differs, the test fails. nbval is also pre-installed in the jupyter-env Conda environment you’re using for this tutorial.\nAs an example, we’ll keep working with the Analysis.ipynb notebook we’ve created.\nLet’s say we want to estimate the size of the bill of penguins using the bill_length_mm and bill_depth_mm columns. We’ll do this by adding a new cell to our notebook with the following code:\npenguins[\"bill_size\"] = (penguins[\"bill_length_mm\"] * penguins[\"bill_depth_mm\"])\nRun the cell and add a new one below it. In the new cell, output the mean of each column grouped by island using the following code:\npenguins.groupby(\"island\").mean(numeric_only=True)\nRun the cell to see the output. Looks good. Now we have a very simple example of some exploratory analyses on a dataset.\nSave the notebook and try running nbval on it to see if it works as expected. From the commandline, run:\npytest --nbval Analysis.ipynb\nnbval tests each cell in your notebook by executing it and comparing the output to the output stored in the notebook. If the output is the same, the test passes. The output of the test should look something like this:\ncollected 4 items\n\nAnalysis.ipynb ....                                                                                                   [100%]\n\n========== 4 passed in 1.93s ==========\nNow let’s say we realize that we want to normalize the bill_size values by the body mass of the penguins. We’ll just modify the cell where we calculated this value, introducing a small piece of code to divide by the body_mass_g column.\nChange the third cell of the notebook so that it reads:\npenguins[\"bill_size\"] = (penguins[\"bill_length_mm\"] * penguins[\"bill_depth_mm\"]) / penguins[\"body_mass_g\"]\nsns.scatterplot(data=penguins, x=\"bill_size\", y=\"flipper_length_mm\", hue=\"island\")\nRe-run the cell and save the notebook. So far so good! Let’s test the notebook again with nbval. Just like before run it from the commandline with:\npytest --nbval Analysis.ipynb\nIf you’ve followed the instructions, this second run of nbval should generate a FAILED test, showing something like:\n==================== short test summary info ====================\nFAILED Analysis.ipynb::Cell 3\n================== 1 failed, 3 passed in 1.83s ==================\nWhat happened here was that we modified the cell where we calculated the bill_size value, but we didn’t re-run the cell where we output the mean of each column grouped by island. This means that the output of the last cell in the notebook now differs from what is actually stored in the notebook variables. This type of error can be difficult to spot, especially if you have a large notebook with many cells. Luckily, nbval can help us here.\n\n\n\n\n\n\nNote\n\n\n\nNote that nbval reports cell numbers using 0-based numbering, so when the test fails on Cell 3 it actually refers to the 4th cell in the notebook.\n\n\nThis problem could have been solved if we had re-run the cell where we output the mean of each column grouped by island. In fact, it is good practice to re-run all cells in a notebook before saving it. If you in addition restart the kernel before re-running you make sure that you haven’t introduced any ‘hidden states’\n\n\n\n\n\n\nIgnoring specific cells\n\n\n\nOne caveat of nbval is that it doesn’t work well with cells that generate plots. You can tell nbval to ignore the output of specific cells by adding # NBVAL_IGNORE_OUTPUT to the top of a cell.\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned: - How to use nbdime to view diffs of Jupyter notebooks - How to use nbval to test that notebooks work as expected"
+  },
+  {
+    "objectID": "pages/jupyter.html#converting-notebooks",
+    "href": "pages/jupyter.html#converting-notebooks",
+    "title": "Working with Jupyter",
+    "section": "14 Converting notebooks",
+    "text": "14 Converting notebooks\nNotebooks can be converted to various output formats such as HTML, PDF, LaTeX etc. directly from the File -&gt; Save and Export Notebook As… menu.\nConversion can also be performed on the command line using the jupyter nbconvert command. nbconvert is installed together with the jupyter Conda package and is executed on the command line by running jupyter nbconvert.\nThe syntax for converting a Jupyter notebook is:\njupyter nbconvert --to &lt;FORMAT&gt; notebook.ipynb\nHere &lt;FORMAT&gt; can be any of asciidoc, custom, html, latex, markdown, notebook, pdf, python, rst, script, slides. Converting to some output formats (e.g. PDF) may require you to install separate software such as Pandoc or a TeX environment.\nTry converting the jupyter-tutorial.ipynb notebook that you have been working on for this tutorial to HTML using jupyter nbconvert.\n\n\n\n\n\n\nTip\n\n\n\nIf the plots in HTML rendered version of your notebook are not displayed properly, try changing the matplotlib_inline.backend_inline.set_matplotlib_formats('pdf', 'svg') line to matplotlib_inline.backend_inline.set_matplotlib_formats('retina').\n\n\nnbconvert can also be used to run a Jupyter notebook from the command line by running:\njupyter nbconvert --execute --to &lt;FORMAT&gt; notebook.ipynb\nnbconvert executes the cells in a notebook, captures the output and saves the results in a new file. Try running it on the jupyter-tutorial.ipynb notebook.\nYou can also specify a different output file with --output &lt;filename&gt;.\nSo in order to execute your jupyter-tutorial.ipynb notebook and save it to a file named report.html you could run:\njupyter nbconvert --to html --output report.html --execute jupyter-tutorial.ipynb\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to convert Jupyter notebooks to various other formats\nHow to use nbconvert to convert notebooks on the command line"
+  },
+  {
+    "objectID": "pages/jupyter.html#notebooks-and-quarto",
+    "href": "pages/jupyter.html#notebooks-and-quarto",
+    "title": "Working with Jupyter",
+    "section": "15 Notebooks and Quarto",
+    "text": "15 Notebooks and Quarto\nYou may have noticed that a lot of the functionality in Jupyter is overlapping with Quarto. And you may be wondering which one to use. This is a difficult question to answer as it will depend on your use-case and personal preference. As such, any answer will be subjective, but we’ll try to give you some pointers on how to get the best out of both worlds.\nWhile similar in some ways Jupyter and Quarto are not completely overlapping. Quarto is great for generating high-quality reports and manuscripts, and is agnostic to the programming language used. Jupyter on the other hand is great for interactive data analysis and exploration with a more direct connection between code and output. While Jupyter is also somewhat agnostic to programming language, it is most commonly used with Python and with both the Jupyter and Python ecosystem at its back it can be customized with a lot of different extensions and plugins.\nThe good news is that the two can be used together allowing you to get the best of both. For example, you may like the professional look of rendered Quarto documents but really like the interactive and exploratory nature of Jupyter. Well you can simply work as you normally do in Jupyter and then use Quarto to render the notebook to a high-quality report or manuscript.\nTo give you an example, take a look at the supplementary_material.ipynb file in the jupyter/ tutorial directory. Open this notebook in the Jupyter lab interface (make sure you have activated the jupyter-env Conda environment).\nAs you can see this notebook contains some brief descriptions in Markdown and code to generate a few plots. It uses the output from the MRSA case-study Snakemake workflow you worked on in the Snakemake tutorial. This is a common use-case for Jupyter notebooks; to generate summary statistics and plots from the results of a workflow run. (A real-world example could of course include a lot more in-depth exploratory analyses).\nNow, let’s say you want to share the results of this notebook with your PI or collaborators. We could simply share the notebook file, or as we saw in the previous section, convert it to HTML or PDF via jupybter nbconvert.\nLet’s do that first so we have something to compare with. Run the following:\njupyter nbconvert --to HTML --output supplementary_material.nbconvert.html supplementary_material.ipynb\nOpen the supplementary_material.nbconvert.html file in a browser to see that it looks like you expect. This looks more or less like the original notebook.\nNow let’s go one step further and render the notebook to a high-quality report using Quarto. We can actually add a YAML header to the notebook with some document options that Quarto understands. Create a new cell in the notebook (from the Jupyter lab interface) and move it to the top. In this cell, add the following:\n---\ntitle: Supplementary material\nsubtitle: Supplementary tables and plots for the MRSA study\nformat:\n    html:\n        embed-resources: true\n        code-fold: true\n        code-tools: true\nlanguage:\n  code-summary: Click to show code\nbibliography: references.bib\n---\nSet the cell type to Markdown, then run the cell. Most likely that cell will look rather weird but that’s OK. We’ll fix that in a bit.\nSave the notebook and now render the document with Quarto from the commandline:\nquarto render supplementary_material.ipynb\nOpen up the supplementary_material.html file in a browser and compare it to the supplementary_material.nbconvert.html file. You should see that the Quarto version looks a lot better. The fact that Quarto supports rendering of Jupyter notebooks means you can keep editing your notebooks as you normally would and use Quarto for rendering the final document. Also there’s very little we had to change in the notebook to make it work with Quarto. If you look closely at the code cells used to generate the plots and table you’ll see that they contain code-chunk options in the same form we used in the Quarto tutorial. These options do not impact the notebook when run in Jupyter, making it easy to use the two tools in combination.\nLet’s go back to the YAML header cell and fix how it looks in the Jupyter notebook. The reason it looks weird is that Jupyter doesn’t understand the syntax. But luckily there’s a Jupyter lab Quarto extension you can install to fix this. Click the extension icon in the left sidebar and search for quarto. Install the jupyterlab-quarto extension and then reload the page. Now the YAML header should look a lot better.\nTry adding more options to the header to customize the look of the rendered document. For instance you could:\n\nadd a Table of contents with (toc: true)\ntry out different themes\nadd your name as author (author: Your Name)\nadd a date (date: last-modified)\n\nand much more.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use Quarto to render Jupyter notebooks to high-quality reports."
+  },
+  {
+    "objectID": "pages/jupyter.html#extra-material",
+    "href": "pages/jupyter.html#extra-material",
+    "title": "Working with Jupyter",
+    "section": "16 Extra material",
+    "text": "16 Extra material\nThe following material contains some additional tips and tricks on how to use Jupyter notebooks. This is not part of the core of the Jupyter material and you can choose what you want to go through, or skip it entirely.\nHere are some useful resources if you want to read more about Jupyter in general:\n\nThe Jupyter project site contains a lot of information and inspiration.\nThe Jupyter Notebook documentation.\nA guide to using widgets for creating interactive notebooks."
+  },
+  {
+    "objectID": "pages/jupyter.html#running-jupyter-notebooks-on-a-cluster",
+    "href": "pages/jupyter.html#running-jupyter-notebooks-on-a-cluster",
+    "title": "Working with Jupyter",
+    "section": "17 Running Jupyter notebooks on a cluster",
+    "text": "17 Running Jupyter notebooks on a cluster\n\nLogin to Uppmax, making sure to use a specific login node, e.g. rackham1:\n\nssh &lt;your-user-name&gt;@rackham1.uppmax.uu.se\n\nCreate/activate a Conda environment containing jupyter, e.g.:\n\nconda create -n jupyter -c conda-forge jupyter\n\nactivate the environment, then run:\n\njupyter notebook --no-browser\nWhen the Jupyter server starts up you should see something resembling:\n[I 2023-11-13 22:15:36.944 ServerApp] Serving notebooks from local directory: &lt;path-to-your-directory&gt;\n[I 2023-11-13 22:15:36.944 ServerApp] Jupyter Server 2.10.0 is running at:\n[I 2023-11-13 22:15:36.944 ServerApp] http://localhost:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6\n[I 2023-11-13 22:15:36.944 ServerApp]     http://127.0.0.1:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6\n[I 2023-11-13 22:15:36.944 ServerApp] Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\nNow a Jupyter notebook server is running on the Uppmax end. The line that says:\n[I 2023-11-13 22:15:36.944 ServerApp] http://localhost:8888/tree?token=25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6\nContains information on the port used on the server side (8888 in this case) and the token required to use the server (25fa07e89b7c0bc2e518f259ba79c67847ca813cdf4eeed6).\nNext step is to use this information to login to the server from your local computer.\nOn your local computer\nIn a terminal, run the following command to start port forwarding of port 8080 on your local computer to the remote port on the Uppmax side. Replace  with the port given when you started the server on Uppmax. Also replace  with your user name on Uppmax.\nssh -N -L localhost:8080:localhost:&lt;remote-port&gt; &lt;your-user-name&gt;@rackham1.uppmax.uu.se\nAs long as this process is running the port forwarding is running. To disable it simply interrupt it with CTRL + C.\nConnect to the Jupyter server by opening localhost:8080 in your browser. When prompted, paste the token you got when starting the server on Uppmax and set a new password."
+  },
+  {
+    "objectID": "pages/jupyter.html#using-binder-to-share-interactive-notebooks",
+    "href": "pages/jupyter.html#using-binder-to-share-interactive-notebooks",
+    "title": "Working with Jupyter",
+    "section": "18 Using Binder to share interactive notebooks",
+    "text": "18 Using Binder to share interactive notebooks\nBinder is a service that allows you to share Jupyter notebooks with others, while also allowing them to run the notebooks in the browser. This is great if you wish to share an analysis and have others interact with the code and results, without them having to install anything locally. What you will need is:\n\nA public GitHub repository containing the notebooks you want to share.\nAn environment.yml file in the repository containing the Conda environment required to run the notebooks.\nData files (if any) required to run the notebook(s).\n\nBinder will then create a Docker image containing the Conda environment and the notebooks, and run a Jupyter server on this image. The Docker image is then hosted on the Binder server and can be used by anyone with the link to the repository to run the notebooks interactively in their browser.\nTo show you an example we’ve created a basic GitHub repository containing the supplementary_material.ipynb notebook from the previous section. If you go to the repository you will see a badge saying “launch binder”, click this to start the Binder server. This will take a few minutes the first time you do it, but after that it should be faster. When the server is ready you will be presented with the now familiar Jupyter interface. Go ahead and open up the supplementary_material.ipynb notebook and run it.\nYou can now interact with the notebook as you would if you had it running on a local Jupyter server. You can change the code, run it, and see the results. You can also add new cells and write new code. However, you cannot save the changes you make to the notebook.\nTo read more about Binder and how to use it, see the Binder documentation. For pointers on how to make data available to the notebooks you share via Binder, see this guide on Accessing data in your Binder."
+  },
+  {
+    "objectID": "pages/nextflow.html",
+    "href": "pages/nextflow.html",
+    "title": "Working with Nextflow",
+    "section": "",
+    "text": "Nextflow is a workflow management system (WfMS), and is one of the most common such systems within the bioinformatic and academic communities. These systems are important for scientific reproducibility in that they greatly facilitate keeping track of which files have been processed in what way throughout an entire project.\nNextflow is built from the ground-up to be portable, scalable, reproducible and usable in a platform-agnostic sense. This means that any workflow you write in Nextflow can be run locally on your laptop, a computer cluster or a cloud service (as long as your architecture has the necessary computational resources). You can also define the compute environment in which each task is carried out on a per-task basis. You might thus develop your workflow on your local computer using a minimal test dataset, but run the full analyses with all samples on e.g. a computer cluster. Nextflow can work on both files and arbitrary values, often-times connected in useful and advanced ways.\nNextflow can easily work with dynamic inputs where the exact output is unknown, e.g. the exact number of files or which samples pass some arbitrary quality control threshold. While Nextflow is based on the Groovy language, you don’t need to know how to code Groovy to be able to write good Nextflow workflows. Nextflow has a large community centred around it, including the nf-core curated collection of high quality pipelines used by e.g. the National Genomics Infrastructure.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/nextflow and activate your nextflow-env Conda environment."
+  },
+  {
+    "objectID": "pages/nextflow.html#introduction",
+    "href": "pages/nextflow.html#introduction",
+    "title": "Working with Nextflow",
+    "section": "",
+    "text": "Nextflow is a workflow management system (WfMS), and is one of the most common such systems within the bioinformatic and academic communities. These systems are important for scientific reproducibility in that they greatly facilitate keeping track of which files have been processed in what way throughout an entire project.\nNextflow is built from the ground-up to be portable, scalable, reproducible and usable in a platform-agnostic sense. This means that any workflow you write in Nextflow can be run locally on your laptop, a computer cluster or a cloud service (as long as your architecture has the necessary computational resources). You can also define the compute environment in which each task is carried out on a per-task basis. You might thus develop your workflow on your local computer using a minimal test dataset, but run the full analyses with all samples on e.g. a computer cluster. Nextflow can work on both files and arbitrary values, often-times connected in useful and advanced ways.\nNextflow can easily work with dynamic inputs where the exact output is unknown, e.g. the exact number of files or which samples pass some arbitrary quality control threshold. While Nextflow is based on the Groovy language, you don’t need to know how to code Groovy to be able to write good Nextflow workflows. Nextflow has a large community centred around it, including the nf-core curated collection of high quality pipelines used by e.g. the National Genomics Infrastructure.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/nextflow and activate your nextflow-env Conda environment."
+  },
+  {
+    "objectID": "pages/nextflow.html#the-basics",
+    "href": "pages/nextflow.html#the-basics",
+    "title": "Working with Nextflow",
+    "section": "2 The basics",
+    "text": "2 The basics\nWe’ll start by creating a very simple workflow from scratch, to show how Nextflow works: it will take two input files and convert them to UPPERCASE letters.\n\nStart by running the following commands:\n\ntouch main.nf\necho \"This is a.txt\" &gt; a.txt\necho \"This is b.txt\" &gt; b.txt\nOpen the main.nf file with an editor of your choice. This is the main workflow file used in Nextflow, where workflows and their processes are defined.\n\nCopy the following code into your main.nf file:\n\n// Workflow definition\nworkflow {\n    // Define input files\n    ch_input = Channel.fromPath( \"a.txt\" )\n\n    // Run workflow\n    CONVERT_TO_UPPER_CASE( ch_input )\n}\n\n// Process definition\nprocess CONVERT_TO_UPPER_CASE {\n    publishDir \"results/\",\n        mode: \"copy\"\n\n    input:\n    path(file)\n\n    output:\n    path(\"a.upper.txt\")\n\n    script:\n    \"\"\"\n    tr [a-z] [A-Z] &lt; ${file} &gt; a.upper.txt\n    \"\"\"\n}\nHere we have two separate parts. The first is the workflow definition, while the last is a process. Let’s go through them both in more detail!\n\n\n\n\n\n\nNextflow comments\n\n\n\nDouble-slashes (//) are used for comments in Nextflow.\n\n\n\n\n\n\n\n\nNextflow and whitespace\n\n\n\nNextflow is not indentation-sensitive. In fact, Nextflow doesn’t care at all about whitespace, so go ahead and use it in whatever manner you think is easiest to read and work with! Do keep in mind that indentations and other types of whitespace does improve readability, so it’s generally not a good idea to forego it entirely, even though you can.\n\n\n\n2.1 Workflow definitions\nworkflow {\n    // Define input files\n    ch_input = Channel.fromPath( \"a.txt\" )\n\n    // Run workflow\n    CONVERT_TO_UPPER_CASE( ch_input )\n}\nThe workflow definition here has two parts, each doing an important job for any Nextflow workflow. The first part defines a channel, which is an asynchronous first-in-first-out stream of data that connect a workflow’s various inputs and outputs. In simpler terms, channels contain the data that you want to process with the workflow and can be passed between the various parts of the workflow.\nChannels can be created in various different ways using channel factories, depending on what type data you want to put into them and where this data is stored. In this particular case we define our ch_input channel using the .fromPath channel factory, which takes a file path as input - here we use the a.txt file. You can thus read ch_input = Channel.fromPath(\"a.txt\") as “create the channel ch_input and send the file a.txt into it”.\n\n\n\n\n\n\nNaming channels\n\n\n\nA channel can be named anything you like, but it is good practice to prepend them with ch_, as that makes it clear which variables are channels and which are just normal variables.\n\n\nHow do we use these channels then? Channels pass data to and from processes through our workflow. By providing channels as arguments to processes, we describe how we want data to flow. This is exactly what we do in the second part: we call our CONVERT_TO_UPPER_CASE process with the ch_input as input argument - this is very similar to functional programming.\nThis is our entire workflow, for now: the creation of a channel followed by using the contents of that channel as input to a single process. Let’s look at how processes themselves are defined!\n\n\n2.2 Process definitions\nprocess CONVERT_TO_UPPER_CASE {\n    publishDir \"results/\",\n        mode: \"copy\"\n\n    input:\n    path(file)\n\n    output:\n    path(\"a.upper.txt\")\n\n    script:\n    \"\"\"\n    tr [a-z] [A-Z] &lt; ${file} &gt; a.upper.txt\n    \"\"\"\n}\nLooking at the process in the code above, we can see several parts. The process block starts with its name, in this case CONVERT_TO_UPPER_CASE, followed by several sections, or directives as Nextflow calls them: publishDir, input, output and script.\n\n\n\n\n\n\nNaming processes\n\n\n\nA process can be named using any case, but a commonly used convention is to use UPPERCASE letters for processes to visually distinguish them in the workflow. You do not have to follow this if you don’t want to, but we do so here.\n\n\nLet’s start with the first directive: publishDir. This tells Nextflow where the output of the process should be placed when it is finished. Setting mode to \"copy\" just means that we want to copy the output files to the publishing directory, rather than using a symbolic link (which is the default).\nThe input and output directives describe the data expected to come through this specific process. Each line of input describes the data expected for each process argument, in the order used in the workflow. In this case, CONVERT_TO_UPPER_CASE expects a single channel (one line of input), and expects the data to be filenames ( i.e. of type path). The script directive is where you put the code that the process should execute.\nNotice that there is a difference between how the inputs and outputs are declared? The output is an explicit string (i.e. surrounded by quotes), while the input is a variable named file. This means inputs can be referenced in the process without naming the data explicitly, unlike the output where the name needs to be explicit. We’ll get back to exactly how this works in just a moment. While the name of the input variable here is chosen to be the descriptive file, we could also have chosen something completely different, e.g. banana (we’d also have to change its reference in the script directive).\n\n\n2.3 Executing workflows\nLet’s try running the workflow we just created!\n\nType the following in your terminal:\n\nnextflow run main.nf\nThis will make Nextflow run the workflow specified in your main.nf file. You should see something along these lines:\nN E X T F L O W  ~  version 22.10.6\nLaunching `./main.nf` [mad_legentil] - revision: 87f0c253ed\nexecutor &gt;  local (1)\n[32/9124a1] process &gt; CONVERT_TO_UPPER_CASE (1) [100%] 1 of 1 ✔\nThe first few lines are information about this particular run, including the Nextflow version used, which workflow definition file was used, a randomly generated run name (an adjective and a scientist), the revision ID as well as where the processes were executed (locally, in this case, as opposed to e.g. SLURM or AWS).\nWhat follows next is a list of all the various processes for this particular workflow. The order does not necessarily reflect the order of execution (depending on each process’ input and output dependencies), but they are in the order they were defined in the workflow file - there’s only the one process here, of course. The first part (e.g. [32/9124a1]) is the process ID, which is also the first part of the subdirectory in which the process is run (the full subdirectory will be something like 32/9124a1dj56n2346236245i2343, so just a longer hash). We then get the process and its name. Lastly, we get how many instances of each process are currently running or have finished. Here we only have the one process, of course, but this will soon change.\n\nLet’s check that everything worked: type ls results/ and see that it contains the output we expected.\nLet’s explore the working directory: change into whatever directory is specified by the process ID (your equivalent to work/32/9124a1[...]).\n\nWhat do you see when you list the contents of this directory? You should see a symbolic link named a.txt pointing to the real location of this file, plus a normal file a.upper.txt, which is the output of the process that was run in this directory. You generally only move into these work directories when debugging errors in your workflow, and Nextflow has some tricks to make this process a lot easier - more on this later.\nSo, in summary: we have three components: a set of inputs stored in a channel, a set of processes and a workflow that defines which processes should be run in what order. We tell Nextflow to push the inputs through the entire workflow, so to speak.\n\nNow it’s your turn! Move back to the workflow root and make it use only the b.txt input file and give you the b.upper.txt instead.\nRun your workflow and make sure it works before you move on; check below if you’re having trouble.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nch_input = Channel.fromPath( \"b.txt\" )\n\n\n\n\n\n2.4 Viewing channel contents\nSomething that’s highly useful during development of Nextflow workflows is to view the contents of channels, which can be done with the view() operator.\n\nAdd the following to your workflow definition (on a new line) and execute the workflow: ch_input.view(). What do you see?\nRemove the view() operator once you’re done.\n\nIt can be quite helpful to view the channel contents whenever you’re unsure of what a channel contains or if you’ve run into some kind of bug or error, or even just when you’re adding something new to your workflow. Remember to view the channel contents whenever you need to during the rest of this tutorial!\n\n\n2.5 Files and sample names\nOne powerful feature of Nextflow is that it can handle complex data structures as input, and not only filenames. One of the more useful things this allows us to do is to couple sample names with their respective data files inside channels.\n\nChange the channel definition to the following:\n\nch_input = Channel\n    .fromPath ( \"a.txt\" )\n    .map      { file -&gt; tuple(file.getBaseName(), file) }\nHere we create a tuple (something containing multiple parts) using the map operator, the base name of the file (a) and the file path (a.txt). The statement .map{ file -&gt; tuple(file.getBaseName(), file) } can thus be read as “replace the channel’s contents with a tuple containing the base name and the file path”. The contents of the channel thus change from [a.txt] to [a, a.txt]. Passing the sample name or ID together with the sample data in this way is extremely useful in a workflow context and can greatly simplify downstream processes.\nBefore this will work, however, we have to change the process itself to make use of this new information contained in the ch_input channel.\n\nChange the process definition to the following:\n\nprocess CONVERT_TO_UPPER_CASE {\n    publishDir \"results/\",\n        mode: \"copy\"\n\n    input:\n    tuple val(sample), path(file)\n\n    output:\n    path(\"${sample}.upper.txt\")\n\n    script:\n    \"\"\"\n    tr [a-z] [A-Z] &lt; ${file} &gt; ${sample}.upper.txt\n    \"\"\"\n}\nNotice how the input now is aware that we’re passing a tuple as input, which allows us to use both the file variable (as before) and the new sample variable. All that’s left now is to change the input to our pipeline!\n\nChange the channel definition line from .fromPath ( \"a.txt\" ) to .fromPath ( [\"a.txt\", \"b.txt\"] ) and try running the pipeline. Make sure it works before you move on! Remember to use the view() operator if you want to inspect the channel contents in detail.\n\n\n\n2.6 Input from samplesheets\nSo far we’ve been specifying inputs using strings inside the workflow itself, but hard-coding inputs like this is not ideal. A better solution is to use samplesheets instead, e.g. comma- or tab-separated data files; this is standard for many pipelines, including nf-core. Take, for example, the following CSV file:\na,a.txt\nb,b.txt\nThis specifies the samples and their respective files on each row. Using such a file is much more portable, scalable and overall easier to use than simple hard-coding things in the workflow definition itself. We might also include an arbitrary number of additional metadata columns, useful for downstream processing and analyses. Using contents of files as input can be done using the .splitCsv() and .map{} operators, like so:\nch_input = Channel\n    .fromPath ( \"first_samplesheet.csv\" )\n    .splitCsv ( )\n    .map      { row -&gt; tuple(row[0], file(row[1])) }\nThe .SplitCsv() operator lets the channel know the input is a CSV file, while the .map{} operator makes the CSV content into a tuple from the first and second elements of each row.\n\nChange the input channel definition to the code above and create the first_samplesheet.csv file as shown above.\nAdd the .view() operator somewhere to show the contents of ch_input.\nExecute the pipeline. Do you see what you expect? Remove the .view() operator before moving on.\n\n\n\n\n\n\n\nNote\n\n\n\nWhile we are still hard-coding the name of the samplesheet it is still much better to edit a samplesheet than having to edit the pipeline itself - there are also convenient ways to work around this using parameters, which we’ll talk more about later in this tutorial.\n\n\nWe can also specify a header in our samplesheet like so: .splitCsv(header: true). This will allow us to reference the columns using their names instead of their index, e.g. row.col1 instead of row[0].\n\nAdd an appropriate header to your samplesheet, make sure your workflow can read it and execute. Use .view() to see what’s going on, if needed.\n\n\n\n2.7 Adding more processes\nIt’s time to add more processes to our workflow! We have the two files a.upper.txt and b.upper.txt; the next part of the workflow is a step that concatenates the content of all these UPPERCASE files.\nWe already have a channel containing the two files we need: the output of the CONVERT_TO_UPPER_CASE process called CONVERT_TO_UPPER_CASE.out. We can use this output as input to a new process using the syntax: CONVERT_TO_UPPER_CASE.out.collect(). The collect() operator groups all the outputs in the channel into a single data object for the next process. This is a many-to-one type of operation: a stream with several files (many) is merged into a lone list of files (one). If collect() was not used, the next process would try to run a task for each file in the output channel.\nLet’s put this in use by adding a new process to the workflow definition. We’ll call this process CONCATENATE_FILES and it will take the output from CONVERT_TO_UPPER_CASE as input, grouped using the collect() operator.\n\nAdd a line to your workflow definition for this new process with the appropriate input - remember that you can use .view() to check channel contents; click below if you’re having trouble.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nCONCATENATE_FILES( CONVERT_TO_UPPER_CASE.out.collect() )\n\n\n\nNow all we have to do is define the actual CONCATENATE_FILES process in the process definition section.\n\nCopy the following code as a new process into your workflow:\n\nprocess CONCATENATE_FILES {\n    publishDir \"results/\",\n        mode: \"copy\"\n\n    input:\n    path(files)\n\n    output:\n    path(\"*.txt\")\n\n    script:\n    \"\"\"\n    cat ${files} &gt; concat.txt\n    \"\"\"\n}\n\nRun your workflow again and check the results/ directory. At this point you should have three files there: a.upper.txt, b.upper.txt and concat.txt.\nInspect the contents of concat.txt - do you see everything as you expected?\n\nNote the use of path(files) as input. Although we pass a list of files as input, the list is considered a single object, and so the files variable references a list. Each file in that list can be individually accessed using an index e.g. ${files[0]}, or as we do here, use the variable without an index to list all the input files.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learnt:\n\nHow to create, execute and extend workflows\nHow to explore the work directory and channel contents\nHow to couple sample names to sample data files\nHow to use samplesheets as input\nHow to collect multiple files as single inputs for processes"
+  },
+  {
+    "objectID": "pages/nextflow.html#executing-workflows-1",
+    "href": "pages/nextflow.html#executing-workflows-1",
+    "title": "Working with Nextflow",
+    "section": "3 Executing workflows",
+    "text": "3 Executing workflows\nIt’s time to start working with a more realistic workflow using the MRSA case study of this course! We’ve created a bare-bones version of this pipeline for you, but we’ll work our way through it as we go along and learn more about Nextflow’s features and functionality. The MRSA workflow looks like this:\nworkflow {\n\n    // Workflow for generating count data for the MRSA case study\n\n    // Get input files from a samplesheet\n    ch_input = Channel\n        .fromPath ( \"samplesheet.csv\" )\n        .splitCsv ( header: true)\n\n    // Define the workflow\n    DOWNLOAD_FASTQ_FILES (\n        ch_input\n    )\n    RUN_FASTQC (\n        DOWNLOAD_FASTQ_FILES.out\n    )\n    RUN_MULTIQC (\n        RUN_FASTQC.out[1].collect()\n    )\n    GET_GENOME_FASTA ()\n    INDEX_GENOME (\n        GET_GENOME_FASTA.out.fasta\n    )\n    ALIGN_TO_GENOME (\n        DOWNLOAD_FASTQ_FILES.out,\n        INDEX_GENOME.out.index\n    )\n    SORT_BAM (\n        ALIGN_TO_GENOME.out.bam\n    )\n    GET_GENOME_GFF3 ()\n    GENERATE_COUNTS_TABLE (\n        SORT_BAM.out.bam.collect(),\n        GET_GENOME_GFF3.out.gff\n    )\n}\nThe workflow has one input channel named ch_input, which reads input from the samplesheet.csv file. We then define the processes to be executed by this workflow, nine in total. The first process (DOWNLOAD_FASTQ_FILES) takes the ch_input channel as input, while the rest of the processes takes the output of previous processes as input. Before we go into more detail regarding the ins-and-outs of this workflow, let’s start with some specifics of how workflows are executed and what you can get from them.\n\n3.1 Reports and visualisations\nLet’s start with running the workflow plus getting some reports and visualisation while we’re at it!\n\nRun the workflow using the following command: nextflow run main_mrsa.nf   -with-report report.html -with-timeline timeline.html -with-dag dag.png.\n\nAfter successful executing, you will find three more files in your current directory: report.html, timeline.html and dag.png. The first file contains a workflow report, which includes various information regarding execution such as runtime, resource usage and details about the different processes. The second file contains a timeline for how long each individual process took to execute, while the last contains a visualisation of the workflow itself.\nTake a few minutes to browse these files for yourself. When running a workflow you can of course choose which of these additional files you want to include by picking which ones are important or interesting to you - or don’t include any!\n\n\n3.2 Logs\nNextflow keeps a log of all the workflows that have been executed. Let’s check it out!\n\nType nextflow log to get a list of all the executions.\n\nHere we get information about when the workflow was executed, how long it ran, its run name, whether it succeeded or not and what command was used to run it. You can also use nextflow log &lt;run name&gt; to show each task’s directory that was executed for that run. You can also supply the -f (or -fields) flag along with additional fields to show.\n\nRun nextflow log &lt;run name&gt; -f hash,name,exit,status\n\nThis shows us not only the beginning of each task’s working directory, but also its name, exit code and status (i.e. if it completed successfully or failed in some manner).\n\n\n\n\n\n\nListing fields\n\n\n\nIf you want to see a complete list of all the fields you might explore using the log, just type nextflow log -l or nextflow log -list-fields. This is highly useful for debugging when there’s some specific information about a run you’re particularly interested in!\n\n\nWe can also get even more detailed information about the latest run by looking into the .nextflow.log file!\n\nLook into the latest log by typing less .nextflow.log.\n\nYou’ll be greeted by a wealth of debugging information, which may even seem a bit overkill at this point! This level of detail is, however, quite useful both as a history of what you’ve attempted and as an additional help when you run into errors! Also, it helps with advanced debugging - which we’ll get into later.\n\n\n3.3 Re-running workflows\nSomething you often want to do in Nextflow (or any WfMS for that matter) is to re-run the workflow when you changed some input files or some of the code for its analyses, but you don’t want to re-run the entire workflow from start to finish. Let’s find out how this works in Nextflow!\n\nRun the same nextflow run main_mrsa.nf command again.\n\nYou’ll notice that Nextflow actually re-ran the entire workflow from scratch, even though we didn’t change anything. This is the default behaviour of Nextflow.\n\nLet’s try that again: nextflow run main_mrsa.nf -resume instead.\n\nNow you can see that Nextflow didn’t actually re-run anything. The -resume flag instructed Nextflow to use the cached results from the previous run!\nNextflow automatically keeps track of not only changes to input files, but also changes to code, process definitions and scripts. You can thus change anything relating to your workflow and just re-run with the -resume flag and be sure that only processes relevant to your changes are executed again!\n\nUse tree work/ to list the contents of the work directory.\n\nBecause Nextflow keeps track of all the runs, we’ve now got two sets of files in the work directory. One set from the first run, and another from the second run. This can take up valuable space, so let’s clean that up.\n\nUse nextflow clean -n -before &lt;run_name&gt; to show which work directories will be cleaned up (use nextflow log to find the run name if you don’t remember it). Then delete those directories by changing -n (dry-run) to -f (force).\n\nNextflow’s clean subcommand can be used to clean up failed tasks and unused processes. Here we used the -before flag, meaning that any runs before the specified run are removed; use nextflow help clean to see other options for cleaning. This is the preferred way to clean up the working directory.\n\nRemove the results directory and re-run the workflow again using the -resume flag.\n\nWe removed all the results we used before, but we still managed to resume the workflow and use its cache - how come? Remember that Nextflow uses the work directory to run all of its tasks, while the results directory is just where we have chosen to publish our outputs. We can thus delete the results directory as often as we like (a necessity when output filenames are changed) and still get everything back without having to re-run anything. If we were to delete the work directory, however…\n\nDelete the work directory and re-run the workflow using the -resume flag.\n\nThere is no longer any cache for Nextflow to use, so it re-runs from the start! This is good to keep in mind: you can always delete the output directories of your workflow, but if you mess with work you’ll lose, well… work!\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learnt:\n\nHow to get automatic reports and visualisations\nHow to check the Nextflow logs\nHow to re-run workflows\nHow to clean the Nextflow cache"
+  },
+  {
+    "objectID": "pages/nextflow.html#working-with-processes",
+    "href": "pages/nextflow.html#working-with-processes",
+    "title": "Working with Nextflow",
+    "section": "4 Working with processes",
+    "text": "4 Working with processes\nNow that we’ve gone through the specifics of executing workflows in a bit more detail, let’s go through working with processes. While there are numerous process directives that can be used, we’ll go through some of the more commonly used ones here.\n\n4.1 Tags\nLet’s look at the command line output we got during the workflow’s execution, which should look something like this:\nN E X T F L O W  ~  version 22.10.6\nLaunching `./main.nf` [friendly_bhaskara] - revision: b4490b9201\nexecutor &gt;  local (17)\n[c9/e5f818] process &gt; DONWLOAD_FASTQ_FILES (SRR935092) [100%] 3 of 3 ✔\n[d5/b5f24e] process &gt; RUN_FASTQC (SRR935092)           [100%] 3 of 3 ✔\n[91/2cea54] process &gt; RUN_MULTIQC                      [100%] 1 of 1 ✔\n[e0/b4fd37] process &gt; GET_GENOME_FASTA                 [100%] 1 of 1 ✔\n[87/32ce10] process &gt; INDEX_GENOME                     [100%] 1 of 1 ✔\n[56/e9a460] process &gt; ALIGN_TO_GENOME (SRR935092)      [100%] 3 of 3 ✔\n[ed/d8c223] process &gt; SORT_BAM (SRR935092)             [100%] 3 of 3 ✔\n[e7/4a6bda] process &gt; GET_GENOME_GFF3                  [100%] 1 of 1 ✔\n[e9/84f093] process &gt; GENERATE_COUNTS_TABLE            [100%] 1 of 1 ✔\nHave you noticed that there are SRA IDs after some of the processes? Well, if you look at which processes show these SRA IDs you might see that it’s only those processes that are executed three times, i.e. once per SRA ID. This doesn’t happen automatically, however, and comes from something called tags. Let’s look at the DONWLOAD_FASTQ_FILES process:\nprocess DONWLOAD_FASTQ_FILES {\n\n    // Download a single-read FASTQ file from the SciLifeLab Figshare remote\n\n    tag \"${sra_id}\"\n    publishDir \"results/data\",\n        mode: \"copy\"\n\n    input:\n    tuple val(sra_id), val(figshare_link)\n\n    output:\n    tuple val(sra_id), path(\"*.fastq.gz\")\n\n    script:\n    \"\"\"\n    wget ${figshare_link} -O ${sra_id}.fastq.gz\n    \"\"\"\n}\nYou can see the tag directive at the very top of the process definition. Tags can be used to e.g. show information about the sample currently being analysed by the process. This is useful both during run-time (allowing you to see which sample is being processed) but also for debugging or finding problematic samples in case of errors or odd output. There is, naturally, no need to use tags for processes which are only run once.\n\nComment out (prefix with //) the tag directive from the DONWLOAD_FASTQ_FILES process and run the workflow again. What do you see?\n\nWithout the tag directive you should instead see the numbers 1 through 3, representing the input files (of which there are three). Nextflow still tells us that it’s working on one of the input files, but it’s generally much more useful to actually see the sample name or ID, rather than just a number.\n\nUncomment the tag directive before you move on.\n\n\n\n4.2 Named outputs\nLet’s move on to the next process! It looks like this:\nprocess RUN_FASTQC {\n\n    // Run FastQC on a FASTQ file.\n\n    tag \"${sample}\"\n    publishDir \"results/\",\n        mode: \"copy\"\n\n    input:\n    tuple val(sample), path(fastq)\n\n    output:\n    path(\"*.html\")\n    path(\"*.zip\")\n\n    script:\n    \"\"\"\n    fastqc ${fastq} -q\n    \"\"\"\n}\nHere is a process with two output channels! One contains all the .html files, while the other contains all the .zip files. How is this handled in the workflow definition of downstream processes that use the outputs? The RUN_MULTIQC process uses this output, and its part in the workflow definition looks like this:\nRUN_MULTIQC (\n    RUN_FASTQC.out[1].collect()\n)\nWe already know about .out and .collect(), but we have something new here: the RUN_MULTIQC process is taking the second channel of the output from the RUN_FASTQC process - [1] is the index for the second channel, as Groovy is zero-based (the first channel is indexed by [0]).\nThis comes with some issues, however. What if we accidentally changed the order of the outputs in the rule, or added a new one? Using positions like this is easy to mess up, but there is a better solution: named outputs! This can be achieved by adding the emit option for some or all of the outputs, like so:\noutput:\npath(*.txt), emit: text\nInstead of referring to the output by its position in an array as before we refer to the channel with a label we choose (.out.text) instead. This benefits us in that we can infer more information about channel contents called text rather than [1], and it is also allows us to be less error-prone when rewriting parts of a workflow.\n\nYour turn! Add named outputs to the RUN_FASTQC process and make RUN_MULTIQC use those outputs. You’ll have to change both the output section of the RUN_FASTQC process, and the workflow definition section for RUN_MULTIQC. If you need help, see the hint below.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// Workflow definition for RUN_MULTIQC\nRUN_MULTIQC (\n    RUN_FASTQC.out.zip.collect()\n)\n\n// Output section of RUN_FASTC\noutput:\npath(\"*.html\"), emit: html\npath(\"*.zip\"),  emit: zip\n\n\n\nCheck if it works by executing the workflow.\n\n\n4.3 Advanced publishing\nSo far we’ve only used the publishDir directive in a very simple way: specifying a directory and the mode to use when publishing (to copy the files rather than symbolically link them). There are more things you can do, however, especially for processes with more than one output. For example, we can publish outputs in separate directories, like so:\npublishDir \"results/tables\",\n    pattern: \"*.tsv\",\n    mode: \"copy\"\npublishDir \"results/logs\",\n    pattern: \"*.log\",\n    mode: \"copy\"\nIn this example, *.tsv files are copied to the folder results/tables/, while *.log files are copied to the folder results/logs. The publishDir directive can be used multiple times in a single process, allowing one to separate output as above, or publish the same output to multiple folders.\n\nEdit the RUN_FASTQC process to place the HTML and compressed files in separate directories. Remove the results directory and re-run the workflow to check that it worked - click below if you’re having trouble.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nprocess RUN_FASTQC {\n\n    (...)\n\n    publishDir \"results/fastqc/html\",\n        pattern: \"*.html\",\n        mode: \"copy\"\n    publishDir \"results/fastqc/zip\",\n        pattern: \"*.zip\",\n        mode: \"copy\"\n\n    (...)\n}\n\n\n\n\nNote that an output and a published output are different things: something can be an output of a process without being published. In fact, the RUN_FASTQC process is a prime example of this! Think about the compressed output: this output is only used by the downstream process RUN_MULTIQC and is never meant to be viewed by a human or used by a human in some downstream task not part of the pipeline itself. We would thus like to keep the compressed files as an output, but not publish said output. How do we do this? Just remove the corresponding publishDir directive!\nThe MRSA workflow we’ve made here was refactored directly from its original version in the Snakemake tutorial of this course, which means that its output structure is not fully taking advantage of some of Nextflow’s functionality. The compressed output we’ve already talked about above is one example.\n\nSee if you can find any other processes in the current implementation of the MRSA workflow that you could optimise like this!\n\nThink about whether all processes actually need to have published outputs. Make sure you test executing the workflow after you’ve made any changes; click below if you want a hint.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nThe GET_GENOME_FASTA and GET_GENOME_GFF3 both download reference files which are only needed by the workflow itself and does not need to be published, the same goes for the genome index generated by the INDEX_GENOME process.\nOne could argue that neither of the BAM files generated by the ALIGN_TO_GENOME and SORT_BAM processes are needed by the user if only the final counts table is of interest, but BAM files can also be useful for exploring the alignments in e.g. IGV. Both BAMs are, however, definitely not needed: only the sorted one should be published if one is interested in BAM files.\n\n\n\n\n\n4.4 Debugging\nIt is, sadly, inevitable that we all make mistakes while coding - nobody’s perfect! Nextflow helps you quite a bit when this happens, not just with its logs but also with informative error messages. Let’s introduce an error and look at what we get:\n\nChange the final output line in the RUN_MULTIQC process to the following and re-run the workflow: path(\"multiqc_general_stats.csv\") - notice the usage of .csv rather than .txt as before.\n\nWe got an error! We get a number of things, actually, including (in order from the top) the name of the process that gave the error, the likely cause, the command that was executed, along with its exit status, output, error and the work directory that the task was run in. Let’s focus on the Caused by: part at the top, which should look something like this:\nCaused by:\n  Missing output file(s) `multiqc_general_stats.csv` expected by process `RUN_MULTIQC`\nWe can also see that the command’s exit status is 0, which means that the command was successful; any exit status other than 0 means there was an error of some kind. We can thus infer that the command (1) worked, (2) failed to give us the output expected by Nextflow. Thankfully, Nextflow graciously prints the work directory for us so that we may check out what happened in more detail.\n\nCopy the working directory path, cd into it and list its contents using ls.\n\nYou might already have spotted the error in the message above; the error we introduced here was that the expected output file has a .csv extension, rather than the correct .txt. Nextflow is expecting the .csv output, but the process script directive is (correctly) giving us the .txt file, which we can see inside the process’ work directory.\n\nGo back to the root directory, revert the error you introduced and re-run the workflow to make sure it works again.\n\nThis might have seemed like a trivial error, but a lot of errors in Nextflow can be solved in the same manner, i.e. by just following the debugging output reported by Nextflow and inspecting the specific subdirectory in question.\n\n\n\n\n\n\nA note about Bash\n\n\n\nIf you are using Bash variables inside the script directive you have to be careful to prepend it with a backslash, e.g. \\${BASH_VARIABLE}. This is because the dollar-sign is used by Nextflow, so you have to tell Nextflow explicitly when you’re using a Bash variable. This is a common source of errors when using Bash variables, so keeping it in mind can save you some debugging time!\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learnt:\n\nHow to use the tag directive\nHow to use named output with emit\nHow to publish outputs into different directories\nHow to debug errors and mistakes"
+  },
+  {
+    "objectID": "pages/nextflow.html#workflow-configuration",
+    "href": "pages/nextflow.html#workflow-configuration",
+    "title": "Working with Nextflow",
+    "section": "5 Workflow configuration",
+    "text": "5 Workflow configuration\nWe’ve so far been working with a relatively non-generalised workflow: it’s got hard-coded inputs, paths and genome references. This is perfectly fine for a project that is purely aimed at getting reproducible results (which is the full extent of what you want in a lot of cases), but it can be made a lot more generalisable. Let’s go through the MRSA workflow and see what can be improved!\n\n5.1 Parameters\nOne of the things that allow generalisability of Nextflow workflows is parameters, which hold information and values that can be changed directly on the command-line at the time of execution. One use of parameters in our MRSA workflow is to remove the hard-coded results output directory, for example. Parameters can be written in the following form:\nparams {\n    parameter_1 = \"some/data/path\"      // A string parameter\n    parameter_2 = 42                    // A value parameter\n    parameter_3 = [\"a\", \"b\", \"c\", \"d\"]  // A list parameter\n}\nYou would then refer to these parameters using e.g. params.parameter_1 anywhere you need to in the workflow. Although parameters can be defined in main_mrsa.nf, it is preferable to define them in a separate configuration file. The default name of this file is nextflow.config and if such a file is present it will be used automatically by Nextflow (to supply a config file with another name use nextflow -c &lt;path-to-config-file&gt; run main_mrsa.nf)\n\nCreate a configuration file and add a parameter for the results output directory.\nUse your newly created parameter in the publishDir directory of a process Run your workflow to see if it worked; click below if you need help.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// Configuration file\nparams {\n    outdir = \"results\"\n}\n\n// A publishDir directive in a process\npublishDir \"${params.outdir}\",\n    mode: \"copy\"\n\n\n\n\n\n5.2 Command line parameters\nWorkflow parameters can be assigned on the command-line by executing workflows like so: nextflow run main_mrsa.nf --parameter_name 'some_value'. The workflow parameter parameter_name is prefixed by a double dash -- to tell Nextflow this is a parameter to the workflow (a single dash is a parameter to Nextflow, e.g. -resume). The value is also quoted (this is important for parameters that take file paths as values).\n\nRun your workflow using the parameter you previously created, but pick something other than the default value!\n\nYou should now have a new directory containing all the results! This is highly useful if you want to keep track of separate runs of a workflow with different software parameters, for example: nextflow run main.nf --important_param 'value1' --resultsdir 'results-value1', or simply want to keep the results of separate versions of the same workflow. You can also change parameters by using the -params-file option or by using another configuration file (and using -c), rather than on the command line!\n\n\n5.3 Configuring inputs\nRemember the input for the MRSA workflow, the ch_input channel? This input (the samplesheet.csv file) is hard-coded inside the main_mrsa.nf file. This could also be made into a parameter!\n\nChange the definition of the ch_input channel to take the value of a new parameter of your choice, defined in the configuration file.\n\nYou should now have a more generalised input to your workflow! Try to run it to make sure it works - look below if you need some help.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// Channel definition\nch_input = Channel\n    .fromPath ( params.input )\n    .splitCsv ( header: true )\n\n// Configuration file\ninput = \"samplesheet.csv\"\n\n\n\nBy specifying inputs from sample sheets like this we can change inputs of a workflow execution by creating another sample sheet and specifying e.g., --input samplesheet-2.csv on the command line. This is highly useful when you want to run a single sample e.g., when testing a workflow, or when you want to keep track of all the different inputs you’ve used historically.\n\n\n5.4 Other configuration scopes\nThere are lots of things that you might want to add to your configuration, not just parameters! The workflow manifest, for example, which might look like this:\nmanifest {\n    name        = \"My Workflow\"\n    description = \"My awesome workflow, created by me\"\n    author      = \"Me\"\n    mainScript  = \"main.nf\"\n    version     = \"1.0.0\"\n}\n\nGo ahead and add a workflow manifest to your nextflow.config file!\n\nThe manifest is useful when you’re publishing or sharing the workflow through e.g. GitHub or similar. There are many more such configuration scopes that you might want to use - read more about them in the documentation.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we learnt:\n\nHow to create parameters in a configuration file\nHow to specify parameters on the command line\nHow to add workflow manifest and other configuration scopes"
+  },
+  {
+    "objectID": "pages/nextflow.html#optimising-the-mrsa-workflow",
+    "href": "pages/nextflow.html#optimising-the-mrsa-workflow",
+    "title": "Working with Nextflow",
+    "section": "6 Optimising the MRSA workflow",
+    "text": "6 Optimising the MRSA workflow\nWe just added several parameters and configurations to our MRSA workflow, but we didn’t do anything about the reference genomes: those are still hard-coded. The current MRSA workflow is, in fact, not very well-optimised for Nextflow at all, being a refactor from the Snakemake tutorial of this course.\nAll of the processes are basically unchanged, excluding some minor alterations. For example, the run_fastqc rule in Snakemake used the -o flag to specify that the results should be in the current directory, followed by moving the output files to their respective output directory. The first part is not needed in Nextflow (as everything is run in its own subdirectory), and the second part is done by the publishDir directive. These are just minor alterations, though, but we can do much more if we fully utilise Nextflow’s features!\n\n6.1 Remote files\nOne of these features is the ability to automatically download remote files, without needing to explicitly do so! The path input type can handle either file paths (like we’ve done so far) or a URI-supported protocol (such as http://, s3://, ftp://, etc.). This would be highly useful for e.g. the GET_GENOME_FASTA process - in fact, we don’t even need that process at all! All we need to do is to change the input to the INDEX_GENOME and ALIGN_TO_GENOME processes.\n\nCreate a new input channel using the fromPath() channel factory and the absolute path (the FTP address) to the genome FASTA.\nMake the INDEX_GENOME process use that input channel instead of the previously used output of the GET_GENOME_FASTA process.\nRemove the GET_GENOME_FASTA process, as it is not needed anymore.\n\nRe-run the workflow to see if it worked. Check the code below for an example if you’re stuck:\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// Channel creation\nch_genome_fasta = Channel.fromPath( \"ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz\" )\n\n// Workflow definition\nINDEX_GENOME (\n    ch_genome_fasta\n)\n\n\n\nWe could also do this using parameters from our configfile, of course!\n\nNow change the input to the GENERATE_COUNTS_TABLE to use the remote GFF3 file and remove the GET_GENOME_GFF3 in the same manner as above, but using a new parameter instead.\n\nRe-run the workflow again to make sure it worked; check below if you’re stuck.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// [ nextflow.config ]\nparams {\n    genome_gff3 = \"ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.37.gff3.gz\"\n}\n\n// [ main.nf ]\n// Channel creation\nch_genome_ggf3 = Channel.fromPath ( params.genome_gff3 )\n\n// Workflow definition\nGENERATE_COUNTS_TABLE (\n    SORT_BAM.out.bam.collect(),\n    ch_genome_ggf3\n)\n\n\n\nIf we want to get detailed we can also change the hard-coded “NCT8325” naming in e.g. the INDEX_GENOME process and put that in another parameter, or grab the baseName() from the channel and make a [prefix, file] tuple using the map{} operator like we did previously; check below if you’re curious of how this could be done.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// Channel definition\nch_genome_fasta = Channel\n    .fromPath( \"ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna/Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz\" )\n    .map     { file -&gt; tuple(file.getBaseName(), file) }\n\n// INDEX_GENOME process definition\nprocess INDEX_GENOME {\n\n    publishDir \"results/bowtie2/\",\n        mode: \"copy\"\n\n    input:\n    tuple val(fasta_name), path(fasta)\n\n    output:\n    path(\"*.b2t\"), emit: index\n\n    script:\n    \"\"\"\n    # Bowtie2 cannot use .gz, so unzip to a temporary file first\n    gunzip -c ${fasta} &gt; tempfile\n    bowtie2-build tempfile ${fasta_name}\n    \"\"\"\n}\n\n\n\n\n\n6.2 Subworkflows\nThe DSL2 allows highly modular workflow design, where a workflow may contain multiple subworkflows. A subworkflow is just like a normal workflow, but it can be called inside other workflows, similar to a process. There is thus no special difference between a subworkflow and a workflow; the only difference is how you use them in practice. Let’s take a look at a toy example:\nworkflow {\n    ch_input = Channel.fromPath ( params.input )\n    SUBWORKFLOW (\n        ch_input\n    )\n}\n\nworkflow SUBWORKFLOW {\n\n    take:\n    input_file\n\n    main:\n    ALIGN_READS( input_file )\n\n    emit:\n    bam = ALIGN_READS.out.bam\n}\nHere we have an unnamed, main workflow like before, plus a named subworkflow. A workflow can have inputs specified by the take directive, which is the equivalent of process input for workflows. The main part is the workflow body, which contains how to run which processes in which order. The last part, emit, also works the same as for processes, in that we name the different outputs of the workflow so that we may use them in other workflows or processes. Nextflow will run the unnamed workflow by default, unless the -entry flag is specified, like so:\nnextflow run main.nf -entry SUBWORKFLOW\nThis will run the workflow named SUBWORKFLOW, but nothing else. You can also store subworkflows in separate files, so that everything doesn’t have to be crammed into a single main.nf file. A subworkflow named SUBWORKFLOW contained in the file subworkflow.nf can be loaded into a main.nf file like so:\ninclude { SUBWORKFLOW } from \"./subworkflow.nf\"\nIf you have a complex workflow with several subworkflows you might thus store them in a separate directory, e.g. subworkflows/. This allows you to have fine-grained control over the general architecture of your Nextflow workflows, organising them in a manner that is easy to code and maintain. A process can also be treated in the same manner, and defined separately in another file.\n\nNow it’s your turn! Separate the RUN_FASTQC and RUN_MULTIQC processes out of the main workflow and into a subworkflow. Check below if you’re having trouble.\n\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// [ main.nf ]\n// Include subworkflow\ninclude { QUALITY_CONTROLS } from \"./subworkflows/quality_controls.nf\"\n\n// Main workflow\nQUALITY_CONTROLS (\n    DOWNLOAD_FASTQ_FILES.out\n)\n\n// [ subworkflows/quality_controls.nf ]\n// Quality controls subworkflow\nworkflow QUALITY_CONTROLS {\n\n    take:\n    fastq\n\n    main:\n    RUN_FASTQC (\n        fastq\n    )\n    RUN_MULTIQC (\n        RUN_FASTQC.out.zip.collect()\n    )\n\n    emit:\n    html          = RUN_MULTIQC.out.html\n    general_stats = RUN_MULTIQC.out.general_stats\n}\n\n// [ Include RUN_FASTQC and RUN_MULTIQC processes here ]\n\n\n\nIf you want to challenge yourself, try to do the same with the INDEX_GENOME, ALIGN_TO_GENOME and SORT_BAM processes! Be careful of where you get your inputs and outputs; check below if you want one of the ways in which you can do this:\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n// [ main.nf ]\n// Include subworkflow\ninclude { ALIGNMENT } from \"./subworkflows/alignment.nf\"\n\n// Main workflow\nALIGNMENT {\n    ch_genome_fasta,\n    DOWNLOAD_FASTQ_FILES.out\n}\n\n// [ subworkflows/alignment.nf ]\n// Alignment subworkflow\nworkflow ALIGNMENT {\n\n    take:\n    fasta\n    fastq\n\n    main:\n    INDEX_GENOME (\n        fasta\n    )\n    ALIGN_TO_GENOME (\n        fastq,\n        INDEX_GENOME.out.index\n    )\n    SORT_BAM (\n        ALIGN_TO_GENOME.out.bam\n    )\n\n    emit:\n    bam = SORT_BAM.out.bam\n}\n\n// [ Include INDEX_GENOME, ALIGN_TO_GENOME and SORT_BAM processes here ]\n\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we learnt:\n\nHow to automatically download remote files\nHow to create and work with subworkflows"
+  },
+  {
+    "objectID": "pages/nextflow.html#extra-material",
+    "href": "pages/nextflow.html#extra-material",
+    "title": "Working with Nextflow",
+    "section": "7 Extra material",
+    "text": "7 Extra material\nThere are many more things you can do with Nextflow than covered here. If you are interested to learn more details about Nextflow, we will briefly show some of its advanced features in this section. But first, here are some links to additional resources on Nextflow:\n\nNextflow patterns that can help with common operations and concepts\nThe Nextflow documentation\nNextflow training at Seqera\nA work-in-progress Nextflow Carpentry course\nCommunity help from Nextflow’s Slack channel\n\n\n7.1 Using containers in Nextflow\nNextflow has built-in support for using both Docker and Apptainer containers (and others too), either with a single container for the workflow as a whole or separate containers for each individual process. The simplest way to do it is to have a single container for your entire workflow, in which case you simply run the workflow and specify the image you want to use, like so:\n# Run with docker\nnextflow run main.nf -with-docker image-name\n\n# Run with Apptainer\nnextflow run main.nf -with-apptainer image.sif\nIf you don’t want to supply this at every execution, you can also add it directly to your configuration file:\n# Docker configuration\nprocess.container = 'image-name'\ndocker.enabled = true\n\n# Apptainer configuration\nprocess.container = 'path/to/image.sif'\napptainer.enabled = true\nIf you instead would like to have each process use a different container you can use the container directive in your processes:\nprocess PROCESS_01 {\n    (...)\n    container: 'image_01'\n    (...)\n}\n\nprocess PROCESS_02 {\n    (...)\n    container: 'image_02'\n    (...)\n}\nRegardless of which solution you go for, Nextflow will execute all the processes inside the specified container. In practice, this means that Nextflow will automatically wrap your processes and run them by executing the Docker or Apptainer command with the image you have provided.\n\n\n7.2 Using Conda in Nextflow\nWhile you can execute Nextflow inside Conda environments just like you would any other type of software, you can also use Conda with Nextflow in the same way as for Docker and Apptainer above. You can either supply an environment.yml file, the path to an existing environment or the packages and their versions directly in the conda directive, like so:\nprocess PROCESS_01 {\n    (...)\n    conda: 'mrsa-environment.yml'\n    (...)\n}\nprocess PROCESS_02 {\n    (...)\n    conda: 'path/to/mrsa-env'\n    (...)\n}\nprocess PROCESS_03 {\n    (...)\n    conda: 'bioconda::bwa=0.7.17 bioconda::samtools=1.13'\n    (...)\n}\nYou can use either of the methods described above with your configuration file as well, here exemplified using an environment.yml file:\nprocess.conda = 'mrsa-environment.yml'\n\n\n7.3 Running Nextflow on Uppmax\nA lot of researchers in Sweden are using the Uppmax computer cluster in Uppsala, which is easily handled by Nextflow. What you need to do is to add the following profile to your nextflow.config file:\nprofiles {\n    // Uppmax general profile\n    uppmax {\n        params {\n            account        = null\n        }\n        process {\n            executor       = 'slurm'\n            clusterOptions = \"-A '${params.account}'\"\n            memory         = { 6.GB * task.attempt }\n            cpus           = { 1 * task.attempt }\n            time           = { 10.h * task.attempt }\n            scratch        = '$SNIC_TMP'\n            errorStrategy  = 'retry'\n            maxRetries     = 1\n        }\n    }\n}\nThis will add a profile to your workflow, which you can access by running the workflow with -profile uppmax. You will also have to supply an extra parameter account which corresponds to your SNIC project account, but the rest you can leave as-is, unless you want to tinker with e.g. compute resource specifications. That’s all you need! Nextflow will take care of communications with SLURM (the system used by Uppmax, specified by the executor line) and will send off jobs to the cluster for you, and everything will look exactly the same way as if you were executing the pipeline locally.\nThe memory, cpus and time lines define the various resources Nextflow will use as well as how much to automatically increase them by if re-trying failed tasks; this, in turn, is specified by the errorStrategy and maxRetries variables. The scratch variable defines where each node’s local storage is situated, which gives Nextflow the most optimal access to the Uppmax file system for temporary files.\n\n\n7.4 Advanced channel creation\nThe input data shown in the MRSA example workflow is not that complex, but Nextflow channels can do much more than that. A common scenario in high-throughput sequencing is that you have pairs of reads for each sample. Nextflow has a special, built-in way to create channels for this data type: the fromFilePairs channel factory:\nch_raw_reads = Channel\n    .fromFilePairs ( \"data/*_R{1,2}.fastq.gz\" )\nThis will create a channel containing all the reads in the data/ directory in the format &lt;sample&gt;_R1.fastq.gz and &lt;sample&gt;_R2.fastq.gz and will pair them together into a nested tuple looking like this:\n[sample, [data/sample_R1.fastq.gz, data/sample_R2.fastq.gz]]\nThe first element of the tuple ([0]) thus contains the value sample, while the second element ([1]) contains another tuple with paths to both read files. This nested tuple can be passed into processes for e.g. read alignment, and it makes the entire procedure of going from read pairs (i.e. two separate files, one sample) into a single alignment file (one file, one sample) very simple. For more methods of reading in data see the Nextflow documentation on Channel Factories.\nWe can also do quite advanced things to manipulate data in channels, such as this:\nsamples_and_treatments = Channel\n    .fromPath ( params.metadata )\n    .splitCsv ( sep: \"\\t\", header: true )\n    .map      { row -&gt; tuple(\"${row.sample_id}\", \"${row.treatment}\") }\n    .filter   { id, treatment -&gt; treatment != \"DMSO\" }\n    .unique   ( )\nThat’s a bit of a handful! But what does it do? The first line specifies that we want to read some data from a file specified by the metadata parameter, and the second line actually reads that data using tab as delimiter, including a header. The map operator takes each entire row and subsets it to only two columns: the sample_id and treatment columns (discarding the other columns). This subset is stored as a tuple. The filter operator is then used to remove any tuples where the second entry (treatment) is not equal to the string \"DMSO\" (i.e. untreated cells, in this example). Finally, we only keep unique tuple values. Let’s say that this is the metadata we’re reading:\nsample        dose    group     treatment\nsample_1      0.1     control   DMSO\nsample_1      1.0     control   DMSO\nsample_1      2.0     control   DMSO\nsample_2      0.1     case      vorinostat\nsample_2      1.0     case      vorinostat\nsample_2      2.0     case      vorinostat\nsample_3      0.1     case      fulvestrant\nsample_3      1.0     case      fulvestrant\nsample_3      2.0     case      fulvestrant\nGiven the channel creation strategy above, we would get the following result:\n[sample_2, vorinostat]\n[sample_3, fulvestrant]\nIn this way, you can perform complex operations on input files or input metadata and send the resulting content to your downstream processes in a simple way. Composing data manipulations in Nextflow like this can be half the fun of writing the workflow. Check out Nextflow’s documentation on Channel operators to see the full list of channel operations at your disposal.\n\n\n7.5 Using Groovy in processes\nYou don’t have to use bash or external scripts inside your processes all the time unless you want to: Nextflow is based on Groovy, which allows you to use both Groovy and Bash in the same process. For example, have a look at this:\nprocess index_fasta {\n    tag \"${fasta_name}\"\n\n    input:\n    tuple val(fasta), path(fasta_file)\n\n    output:\n    path(\"${fasta_name}.idx\"), emit: fasta\n\n    script:\n    fasta_name = fasta.substring(0, fasta.lastIndexOf(\".\"))\n    \"\"\"\n    index --ref ${fasta_file},${fasta_name}\n    \"\"\"\n}\nHere we have some command index that, for whatever reason, requires both the path to a FASTA file and the name of that file without the .fasta extension. We can use Groovy in the script directive together with normal Bash, mixing and matching as we like. The first line of the script directive gets the name of the FASTA file without the extension by removing anything after the dot, while the second calls the index command like normal using bash.\n\n\n7.6 The nf-core pipeline collection\nYou may have heard of the nf-core pipeline collection previously, which is a large, collaborative bioinformatics community dedicated to building, developing and maintaining Nextflow workflows. In fact, if you have sequenced data at e.g. the National Genomics Infrastructure (NGI), you can be sure that the data processing has been run using one of the nf-core pipelines! While the community only started in 2018 (with a Nature Biotechnology paper in 2020), it already has over 30 production-ready pipelines with everything from genomics, transcriptomics, proteomics and metagenomics - and more being developed all the time.\nThe nf-core pipelines all work in the same way, in that they have the same exact base for inputs, parameters and arguments, making them all highly similar to run. Since you’ve already learnt the basics of Nextflow in this course, you should now be able to also run the nf-core pipelines! It might be that you have a data type that you can analyse using one of the pipelines in nf-core, meaning you don’t need to do anything other than find out what parameters you should run it with.\nEach pipeline comes with extensive documentation, test datasets that you can use to practice on, can be run on both HPCs like Uppmax, cloud services like AWS or locally on your own computer. All pipelines support both Conda and Docker/Apptainer, and you can additionally run specific versions of the pipelines, allowing for full reproducibility of your analyses. If you want to check nf-core out, simply head over to their list of pipelines and see what’s available! Who knows, you might even write your own nf-core pipeline in the future?"
+  },
+  {
+    "objectID": "pages/snakemake.html",
+    "href": "pages/snakemake.html",
+    "title": "Working with Snakemake",
+    "section": "",
+    "text": "A workflow management system (WfMS) is a piece of software that sets up, performs and monitors a defined sequence of computational tasks (i.e. “a workflow”). Snakemake is a WfMS that was developed in the bioinformatics community, and as such it has a number of features that make it particularly well-suited for creating reproducible and scalable data analyses.\nFirst of all the language you use to formulate your workflows is based on Python, which is a language with strong standing in academia. However, users are not required to know how to code in Python to work efficiently with Snakemake. Workflows can easily be scaled from your desktop to server, cluster, grid or cloud environments. This makes it possible to develop a workflow on your laptop, maybe using only a small subset of your data, and then run the real analysis on a cluster. Snakemake also has several features for defining the environment with which each task is carried out. This is important in bioinformatics, where workflows often involve running a large number of small third-party tools.\nSnakemake is primarily intended to work on files (rather than for example streams, reading/writing from databases or passing variables in memory). This fits well with many fields of bioinformatics, notably next-generation sequencing, that often involve computationally expensive operations on large files. It’s also a good fit for a scientific research setting, where the exact specifications of the final workflow aren’t always known at the beginning of a project.\nLastly, a WfMS is a very important tool for making your analyses reproducible. By keeping track of when each file was generated, and by which operation, it is possible to ensure that there is a consistent “paper trail” from raw data to final results. Snakemake also has features that allow you to package and distribute the workflow, and any files it involves, once it’s done.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/snakemake and activate your snakemake-env Conda environment."
+  },
+  {
+    "objectID": "pages/snakemake.html#introduction",
+    "href": "pages/snakemake.html#introduction",
+    "title": "Working with Snakemake",
+    "section": "",
+    "text": "A workflow management system (WfMS) is a piece of software that sets up, performs and monitors a defined sequence of computational tasks (i.e. “a workflow”). Snakemake is a WfMS that was developed in the bioinformatics community, and as such it has a number of features that make it particularly well-suited for creating reproducible and scalable data analyses.\nFirst of all the language you use to formulate your workflows is based on Python, which is a language with strong standing in academia. However, users are not required to know how to code in Python to work efficiently with Snakemake. Workflows can easily be scaled from your desktop to server, cluster, grid or cloud environments. This makes it possible to develop a workflow on your laptop, maybe using only a small subset of your data, and then run the real analysis on a cluster. Snakemake also has several features for defining the environment with which each task is carried out. This is important in bioinformatics, where workflows often involve running a large number of small third-party tools.\nSnakemake is primarily intended to work on files (rather than for example streams, reading/writing from databases or passing variables in memory). This fits well with many fields of bioinformatics, notably next-generation sequencing, that often involve computationally expensive operations on large files. It’s also a good fit for a scientific research setting, where the exact specifications of the final workflow aren’t always known at the beginning of a project.\nLastly, a WfMS is a very important tool for making your analyses reproducible. By keeping track of when each file was generated, and by which operation, it is possible to ensure that there is a consistent “paper trail” from raw data to final results. Snakemake also has features that allow you to package and distribute the workflow, and any files it involves, once it’s done.\nThis tutorial depends on files from the course GitHub repo. Take a look at the setup for instructions on how to set it up if you haven’t done so already, then open up a terminal and go to workshop-reproducible-research/tutorials/snakemake and activate your snakemake-env Conda environment."
+  },
+  {
+    "objectID": "pages/snakemake.html#the-basics",
+    "href": "pages/snakemake.html#the-basics",
+    "title": "Working with Snakemake",
+    "section": "2 The basics",
+    "text": "2 The basics\nIn this part of the tutorial we will create a very simple workflow from scratch, in order to show the fundamentals of how Snakemake works. The workflow will take two files as inputs, a.txt and b.txt, and the purpose is to convert the text in the files to upper case and then to concatenate them.\nRun the following shell commands. The first one will make an empty file named Snakefile, which will later contain the workflow. The second and third commands generate two files containing some arbitrary text.\ntouch Snakefile\necho \"This is a.txt\" &gt; a.txt\necho \"This is b.txt\" &gt; b.txt\nThen open Snakefile in your favourite text editor. A Snakemake workflow is based on rules which take some file(s) as input, performs some type of operation on them, and generate some file(s) as outputs. Here is a very simple rule that produces a.upper.txt as an output, using a.txt as input. Copy this rule to your Snakefile and save it.\nrule convert_to_upper_case:\n    output:\n        \"a.upper.txt\"\n    input:\n        \"a.txt\"\n    shell:\n        \"\"\"\n        tr [a-z] [A-Z] &lt; {input} &gt; {output}\n        \"\"\"\n\n\n\n\n\n\nCaution\n\n\n\nIndentation is important in Snakefiles, so make sure that you have the correct number of spaces before input/output/shell and their respective subsections. The number of spaces per level doesn’t matter as long as you’re consistent. Here we use four, but you could just as well use two for a more compact look. Don’t use tabs (unless your editor automatically converts them to spaces).\n\n\nRules can be given names, here it’s convert_to_upper_case. While rule names are not strictly necessary we encourage you to use them and to make an effort to name your rules in a way that makes it easy to understand the purpose of the rule, as rule names are one of the main ways to interact with the workflow. The shell section (or directive) contains the shell commands that will convert the text in the input file to upper case and send it to the output file. In the shell command string, we can refer to elements of the rule via curly brackets. Here, we refer to the output file by specifying {output} and to the input file by specifying {input}. If you’re not very familiar with Bash, this particular command can be read like “send the contents of a.txt to the program tr, which will convert all characters in the set [a-z] to the corresponding character in the set [A-Z], and then send the output to a.upper.txt”.\nNow let’s run our first Snakemake workflow. When a workflow is executed Snakemake tries to generate a set of target files. Target files can be specified via the command line (or, as you will see later, in several other ways). Here we ask Snakemake to make the file a.upper.txt. We can specify the file containing our rules with -s but since the default behaviour of Snakemake is to look for a file called Snakefile in either the working directory or in a subdirectory called workflow/ we don’t need to specify that here. It’s good practice to first run with the flag -n (or --dry-run), which will show what Snakemake plans to do without actually running anything, and you also need to specify how many cores to be used for the workflow with --cores or -c. For now, you only need 1 so set -c 1. You can also use the flag -p, for showing the shell commands that it will execute, and the flag -r for showing the reason for running a specific rule. snakemake --help will show you all available flags.\n$ snakemake -n -c 1 -r -p a.upper.txt\n\nBuilding DAG of jobs...\nJob stats:\njob                      count    min threads    max threads\n---------------------  -------  -------------  -------------\nconvert_to_upper_case        1              1              1\ntotal                        1              1              1\n\n\n[Mon Oct 25 16:48:43 2021]\nrule convert_to_upper_case:\n    input: a.txt\n    output: a.upper.txt\n    jobid: 0\n    reason: Missing output files: a.upper.txt\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n\n        tr [a-z] [A-Z] &lt; a.txt &gt; a.upper.txt\n\nJob stats:\njob                      count    min threads    max threads\n---------------------  -------  -------------  -------------\nconvert_to_upper_case        1              1              1\ntotal                        1              1              1\n\nThis was a dry-run (flag -n). The order of jobs does not reflect the order of execution.\nYou can see that Snakemake plans to run one job: the rule convert_to_upper_case with a.txt as input and a.upper.txt as output. The reason for doing this is that it’s missing the file a.upper.txt. Now execute the workflow without the -n flag and check that the contents of a.upper.txt is as expected. Then try running the same command again. What do you see? It turns out that Snakemake only reruns jobs if there have been changes to either the input files, or the workflow itself. This is how Snakemake ensures that everything in the workflow is up to date. We will get back to this shortly.\nWhat if we ask Snakemake to generate the file b.upper.txt?\n$ snakemake -n -c 1 -r -p b.upper.txt\n\nBuilding DAG of jobs...\nMissingRuleException:\nNo rule to produce b.upper.txt (if you use input functions make sure that they don't raise unexpected exceptions).\nThat didn’t work well. We could copy the rule to make a similar one for b.txt, but that would be a bit cumbersome. Here is where named wildcards come in; one of the most powerful features of Snakemake. Simply change the input from input: \"a.txt\" to input: \"{some_name}.txt\" and the output to output: \"{some_name}.upper.txt\". Now try asking for b.upper.txt again.\nTada! What happens here is that Snakemake looks at all the rules it has available (actually only one in this case) and tries to assign values to all wildcards so that the targeted files can be generated. In this case it was quite simple, you can see that it says that wildcards: some_name=b, but for large workflows and multiple wildcards it can get much more complex. Named wildcards is what enables a workflow (or single rules) to be efficiently generalized and reused between projects or shared between people.\nIt seems we have the first part of our workflow working, now it’s time to make the second rule for concatenating the outputs from convert_to_upper_case. The rule structure will be similar; the only difference is that here we have two inputs instead of one. This can be expressed in two ways, either with named inputs like this:\ninput:\n    firstFile=\"...\",\n    secondFile=\"...\"\nshell:\n    \"\"\"\n    some_function {input.firstFile} {input.secondFile}\n    \"\"\"\nOr with indexes like this:\ninput:\n    \"...\",\n    \"...\"\nshell:\n    \"\"\"\n    some_function {input[0]} {input[1]}\n    \"\"\"\n\n\n\n\n\n\nCaution\n\n\n\nIf you have multiple inputs or outputs they need to be delimited with a comma (as seen above). This is a very common mistake when writing Snakemake workflows. The parser will complain, but sometimes the error message can be difficult to interpret.\n\n\nNow try to construct this rule yourself and name it concatenate_a_and_b. The syntax for concatenating two files in Bash is cat first_file.txt second_file.txt &gt; output_file.txt. Call the output c.txt. Run the workflow in Snakemake and validate that the output looks as expected.\nWouldn’t it be nice if our workflow could be used for any files, not just a.txt and b.txt? We can achieve this by using named wildcards (or in other ways as we will discuss later). As we’ve mentioned, Snakemake looks at all the rules it has available and tries to assign values to all wildcards so that the targeted files can be generated. We therefore have to name the output file in a way so that it also contains information about which input files it should be based on. Try to figure out how to do this yourself. If you’re stuck you can look at the spoiler below, but spend some time on it before you look. Also rename the rule to concatenate_files to reflect its new more general use.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nrule concatenate_files:\n    output:\n        \"{first}_{second}.txt\"\n    input:\n        \"{first}.upper.txt\",\n        \"{second}.upper.txt\"\n    shell:\n        \"\"\"\n        cat {input[0]} {input[1]} &gt; {output}\n        \"\"\"\n\n\n\nWe can now control which input files to use by the name of the file we ask Snakemake to generate. Run the workflow without the flag -n (or --dry-run) to execute both rules, providing one core with -c 1 (or --cores 1):\n$ snakemake a_b.txt -c 1\n\nBuilding DAG of jobs...\nUsing shell: /bin/bash\nProvided cores: 1 (use --cores to define parallelism)\nRules claiming more threads will be scaled down.\nJob stats:\njob                      count    min threads    max threads\n---------------------  -------  -------------  -------------\nconcatenate_files            1              1              1\nconvert_to_upper_case        2              1              1\ntotal                        3              1              1\n\nSelect jobs to execute...\n\n[Mon Oct 25 16:51:52 2021]\nrule convert_to_upper_case:\n    input: b.txt\n    output: b.upper.txt\n    jobid: 2\n    wildcards: some_name=b\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n[Mon Oct 25 16:51:53 2021]\nFinished job 2.\n1 of 3 steps (33%) done\nSelect jobs to execute...\n\n[Mon Oct 25 16:51:53 2021]\nrule convert_to_upper_case:\n    input: a.txt\n    output: a.upper.txt\n    jobid: 1\n    wildcards: some_name=a\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n[Mon Oct 25 16:51:53 2021]\nFinished job 1.\n2 of 3 steps (67%) done\nSelect jobs to execute...\n\n[Mon Oct 25 16:51:53 2021]\nrule concatenate_files:\n    input: a.upper.txt, b.upper.txt\n    output: a_b.txt\n    jobid: 0\n    wildcards: first=a, second=b\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n[Mon Oct 25 16:51:53 2021]\nFinished job 0.\n3 of 3 steps (100%) done\nNeat!\n\n\n\n\n\n\nTip\n\n\n\nYou can name a file whatever you want in a Snakemake workflow, but you will find that everything falls into place much nicer if the filename reflects the file’s path through the workflow, e.g. sample_a.trimmed.deduplicated.sorted.bam.\n\n\nThe input to Snakemake rules have to be strings or lists of strings, however you don’t have to specify these strings directly in the input: section of rules. Instead, you can specify Python functions that return strings or lists of strings. This allows you to supply input to rules that can vary depending on the wildcards being used. We’ll get to why that’s useful in a sec, but first let’s put it to use for the conatenate_files rule. Because Snakemake is based on Python we can mix rule definitions with standard python code in the same file. Add a function just above the concatenate_files that looks like this:\ndef concat_input(wildcards):\n    files = [wildcards.first + \".upper.txt\", wildcards.second + \".upper.txt\"]\n    return files\nThis is the syntax to define a function in Python. The def concat_input(wildcards): line shows the name of the function (concat_input) and the variable passed to the function (the wildcards object). In the second line we add two items to a list that we call files and add the ‘.upper.txt’ suffix to each item. Finally, the function returns the list. Because the concatenate_files rule has two wildcards {first} and {second} we can access the actual strings in the wildcards object using wildcards.first and wildcards.second. When we ask for the file a_b.txt then wildcards.first == 'a' and wildcards.second == 'b'. This means that the files list returned by the function will be ['a.upper.txt', 'b.upper.txt']. To see for yourself you can add the following line to the function, just before the return statement: print (wildcards.first, wildcards.second, files). This way the wildcard values and the list will be printed to the terminal when you run Snakemake.\nNow that we’ve defined the function to use as input, we can use it in the concatenate_files rule. Update the rule so that it looks like this:\nrule concatenate_files:\n    output:\n        \"{first}_{second}.txt\"\n    input:\n        concat_input\n    shell:\n        \"\"\"\n        cat {input[0]} {input[1]} &gt; {output}\n        \"\"\"\nYou see that the name of the function concat_input is added in place of the input strings. When using the wildcards object in input functions like this we have to call the function without any arguments (simply concat_input) and the function has to be defined to accept a single argument (here def concat_input(wildcards):). Let’s run the workflow with the updated rule. Remove the file a_b.txt or add -f to the Snakemake command to force a re-run:\nsnakemake a_b.txt -c 1 -f\nIf you added the print statement to the function you should see the following printed to your terminal:\nBuilding DAG of jobs...\na b ['a.upper.txt', 'b.upper.txt']\nFollowed by the rest of the workflow output.\nThere are a number of possible use-cases for input functions. For example, say that you have an experiment where you’ve sequenced three samples: sample1, sample2 and sample3 with the corresponding FASTQ files under data/ and you want to write a rule that outputs the statistics of all sequences within each sample. However, samples sample1 and sample2 have been sequenced with single-end technology while sample3 have paired-end reads. The single-end samples will have only one FASTQ file whereas the paired-end sample will have two (one for each sequenced end). Thus, depending on the name of the sample the input to the function will either be one file or two. With input functions we can write a generalized rule that can handle both types:\ndef fastq_input(wildcards):\n    if wildcards.sample_id in [\"sample1\", \"sample2\"]:\n        return \"data/\" + wildcards.sample_id + \".fastq.gz\"\n    else:\n        return [\"data/\" + wildcards.sample_id + \".R1.fastq.gz\",\n                \"data/\" + wildcards.sample_id + \".R2.fastq.gz\"]\n\nrule fastq_stats:\n    output:\n        \"{sample_id}.stats.txt\"\n    input:\n        fastq_input\n    shell:\n        \"\"\"\n        seqtk comp {input} &gt; {output}\n        \"\"\"\nAs you can see, the fastq_stats rule outputs one file {sample_id}.stats.txt and takes as input the value returned from the fastq_input function. In this function the sample id is evaluated and if it is either sample1 or sample2 (our single-end samples) then the function returns a single string which is the path to the FASTQ file for that sample. Otherwise, the function returns a list containing both the R1 and R2 files for the sample. In the shell: directive of the rule the seqtk comp command is run on the input and the output is sent to the output file.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow a simple Snakemake rule looks.\nHow to define target files when executing a workflow.\nHow to use named wildcards for writing generic and flexible rules.\nHow to use input functions in rules"
+  },
+  {
+    "objectID": "pages/snakemake.html#visualising-workflows",
+    "href": "pages/snakemake.html#visualising-workflows",
+    "title": "Working with Snakemake",
+    "section": "3 Visualising workflows",
+    "text": "3 Visualising workflows\nAll that we’ve done so far could quite easily be done in a simple shell script that takes the input files as parameters. Let’s now take a look at some of the features where a WfMS like Snakemake really adds value compared to a more straightforward approach. One such feature is the possibility to visualize your workflow. Snakemake can generate three types of graphs, one that shows how the rules are connected, one that shows how the jobs (i.e. an execution of a rule with some given inputs/outputs/settings) are connected, and finally one that shows rules with their respective input/output files.\nFirst we look at the rule graph. The following command will generate a rule graph in the dot language and pipe it to the program dot, which in turn will save a visualization of the graph as a PNG file (if you’re having troubles displaying PNG files you could use SVG or JPG instead).\n\n\n\n\n\n\nCaution\n\n\n\nIf you added the print(wildcards.first,wildcards.second,files) statement to the concat_input function in the previous section you need to remove that line before running the commands below.\n\n\nsnakemake --rulegraph a_b.txt | dot -Tpng &gt; rulegraph.png\n\nThis looks simple enough, the output from the rule convert_to_upper_case will be used as input to the rule concatenate_files.\nFor a more typical bioinformatics project it can look something like this when you include all the rules from processing of the raw data to generating figures for the paper.\n\nWhile saying that it’s easy to read might be a bit of a stretch, it definitely gives you a better overview of the project than you would have without a WfMS.\nThe second type of graph is based on the jobs, and looks like this for our little workflow (use --dag instead of --rulegraph).\nsnakemake --dag a_b.txt | dot -Tpng &gt; jobgraph.png\n\nThe main difference here is that now each node is a job instead of a rule. You can see that the wildcards used in each job are also displayed. Another difference is the dotted lines around the nodes. A dotted line is Snakemake’s way of indicating that this rule doesn’t need to be rerun in order to generate a_b.txt. Validate this by running snakemake -n -r a_b.txt and it should say that there is nothing to be done.\nWe’ve discussed before that one of the main purposes of using a WfMS is that it automatically makes sure that everything is up to date. This is done by recursively checking that outputs are always newer than inputs for all the rules involved in the generation of your target files. Now try to change the contents of a.txt to some other text and save it. What do you think will happen if you run snakemake -n -r a_b.txt again?\n\n\n\n\n\n\nClick to show\n\n\n\n\n\n$ snakemake -n -r a_b.txt\n\nBuilding DAG of jobs...\nJob stats:\njob                      count    min threads    max threads\n---------------------  -------  -------------  -------------\nconcatenate_files            1              1              1\nconvert_to_upper_case        1              1              1\ntotal                        2              1              1\n\n\n[Mon Oct 25 17:00:02 2021]\nrule convert_to_upper_case:\n    input: a.txt\n    output: a.upper.txt\n    jobid: 1\n    reason: Updated input files: a.txt\n    wildcards: some_name=a\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n\n[Mon Oct 25 17:00:02 2021]\nrule concatenate_files:\n    input: a.upper.txt, b.upper.txt\n    output: a_b.txt\n    jobid: 0\n    reason: Input files updated by another job: a.upper.txt\n    wildcards: first=a, second=b\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\nJob stats:\njob                      count    min threads    max threads\n---------------------  -------  -------------  -------------\nconcatenate_files            1              1              1\nconvert_to_upper_case        1              1              1\ntotal                        2              1              1\n\nThis was a dry-run (flag -n). The order of jobs does not reflect the order of execution.\n\n\n\nWere you correct? Also generate the job graph and compare to the one generated above. What’s the difference? Now rerun without -n and validate that a_b.txt contains the new text (don’t forget to specify -c 1). Note that Snakemake doesn’t look at the contents of files when trying to determine what has changed, only at the timestamp for when they were last modified.\nWe’ve seen that Snakemake keeps track of if files in the workflow have changed, and automatically makes sure that any results depending on such files are regenerated. What about if the rules themselves are changed? It turns out that since version 7.8.0 Snakemake keeps track of this automatically.\nLet’s say that we want to modify the rule concatenate_files to also include which files were concatenated.\nrule concatenate_files:\n    output:\n        \"{first}_{second}.txt\"\n    input:\n        \"{first}.upper.txt\",\n        \"{second}.upper.txt\"\n    shell:\n        \"\"\"\n        echo 'Concatenating {input}' | cat - {input[0]} {input[1]} &gt; {output}\n        \"\"\"\n\n\n\n\n\n\nNote\n\n\n\nIt’s not really important for the tutorial, but the shell command used here first outputs “Concatenating” followed by a space delimited list of the files in input. This string is then sent to the program cat where it’s concatenated with input[0] and input[1] (the parameter - means that it should read from standard input). Lastly, the output from cat is sent to {output}.\n\n\nIf you now run the workflow as before you should see:\nrule concatenate_files:\n    input: a.upper.txt, b.upper.txt\n    output: a_b.txt\n    jobid: 0\n    reason: Code has changed since last execution\n    wildcards: first=a, second=b\nBecause although no files involved in the workflow have been changed, Snakemake recognizes that the workflow code itself has been modified and this triggers a re-run.\nSnakemake is aware of changes to four categories of such “rerun-triggers”: “input” (changes to rule input files), “params” (changes to the rule params section), “software-env” (changes to Conda environment files specified by the conda: directive) and “code” (changes to code in the shell:, run:, script: and notebook: directives).\nPrior to version 7.8.0, only changes to the modification time of input files would trigger automatic re-runs. To run Snakemake with this previous behaviour you can use the setting --rerun-triggers mtime at the command line. Change the shell: section of the concatenate_files rule back to the previous version, then try running: snakemake -n -r a_b.txt --rerun-triggers mtime and you should again see Nothing to be done (all requested files are present and up to date).\nYou can also export information on how all files were generated (when, by which rule, which version of the rule, and by which commands) to a tab-delimited file like this:\nsnakemake a_b.txt -c 1 -D &gt; summary.tsv\nThe content of summary.tsv is shown in the table below:\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\noutput_file\ndate\nrule\nversion\nlog-file(s)\ninput-file(s)\nshellcmd\nstatus\nplan\n\n\n\n\na_b.txt\nMon Oct 25 17:01:46 2021\nconcatenate_files\n-\n\na.upper.txt,b.upper.txt\ncat a.upper.txt b.upper.txt &gt; a_b.txt\nrule implementation changed\nupdate pending\n\n\na.upper.txt\nMon Oct 25 17:01:46 2021\nconvert_to_upper_case\n-\n\na.txt\ntr [a-z] [A-Z] &lt; a.txt &gt; a.upper.txt\nok\nno update\n\n\nb.upper.txt\nMon Oct 25 17:01:46 2021\nconvert_to_upper_case\n-\n\nb.txt\ntr [a-z] [A-Z] &lt; b.txt &gt; b.upper.txt\nok\nno update\n\n\n\n\nYou can see in the second last column that the rule implementation for a_b.txt has changed. The last column shows if Snakemake plans to regenerate the files when it’s next executed. You can see that for the concatenate_files the plan is update pending because we generated the summary with the default behaviour of using all rerun-triggers.\nYou might wonder where Snakemake keeps track of all these things? It stores all information in a hidden subdirectory called .snakemake. This is convenient since it’s easy to delete if you don’t need it anymore and everything is contained in the project directory. Just be sure to add it to .gitignore so that you don’t end up tracking it with git.\nBy now you should be familiar with the basic functionality of Snakemake, and you can build advanced workflows with only the features we have discussed here. There’s a lot we haven’t covered though, in particular when it comes to making your workflow more reusable. In the following section we will start with a workflow that is fully functional but not very flexible. We will then gradually improve it, and at the same time showcase some Snakemake features we haven’t discussed yet. Note that this can get a little complex at times, so if you felt that this section was a struggle then you could move on to one of the other tutorials instead.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use --dag and --rulegraph for visualizing the job and rule graphs, respectively.\nHow Snakemake reruns relevant parts of the workflow after there have been changes.\nHow Snakemake tracks changes to files and code in a workflow"
+  },
+  {
+    "objectID": "pages/snakemake.html#the-mrsa-workflow",
+    "href": "pages/snakemake.html#the-mrsa-workflow",
+    "title": "Working with Snakemake",
+    "section": "4 The MRSA workflow",
+    "text": "4 The MRSA workflow\nAs you might remember from the intro, we are attempting to understand how lytic bacteriophages can be used as a future therapy for the multi-resistant bacteria MRSA (methicillin-resistant Staphylococcus aureus). In order to do this we have performed RNA-seq of three strains, one test and two controls. We have already set up a draft Snakemake workflow for the RNA-seq analysis and it seems to be running nicely. The rest of the Snakemake tutorial will be spent improving and making this workflow more flexible!\n\n\n\n\n\n\nTip\n\n\n\nThis section will leave a little more up to you compared to the previous one. If you get stuck at some point the final workflow after all the modifications is available in tutorials/git/Snakefile.\n\n\nYou are probably already in your snakemake-env environment, otherwise activate it (use conda info --envs if you are unsure).\n\n\n\n\n\n\nTip\n\n\n\nHere we have one Conda environment for executing the whole Snakemake workflow. Snakemake also supports using explicit Conda environments on a per-rule basis, by specifying something like conda: rule-specific-env.yml in the rule definition and running Snakemake with the --use-conda flag. The given rule will then be run in the Conda environment specified in rule-specific-env.yml that will be created and activated on the fly by Snakemake. Note that by default Snakemake uses conda to generate the rule-specific environments. This behaviour can be changed by running with --conda-frontend conda, which will force Snakemake to use conda instead.\n\n\nLet’s start by generating the rule graph so that we get an overview of the workflow. Here we have to specify the file with the rules using the -s flag to Snakemake since the path to the file differs from the default.\nsnakemake -s snakefile_mrsa.smk --rulegraph | dot -T png &gt; rulegraph_mrsa.png\nThere’s another difference in this command compared to the one we’ve used before, namely that we don’t define a target. In the toy example we used a_b.txt as a target, and the wildcards were resolved based on that. How come that we don’t need to do that here? It turns out that by default Snakemake targets the first rule in a workflow. By convention, we call this rule all and let it serve as a rule for aggregating the main outputs of the workflow.\n\nNow take some time and look through the workflow file and try to understand how the rules fit together. Use the rule graph as aid. The rules represent a quite standard, although somewhat simplified, workflow for RNA-seq analysis. If you are unfamiliar with the purpose of the different operations (index genome, FastQC and so on), then take a look at the intro.\nAlso generate the job graph in the same manner. Here you can see that three samples will be downloaded: SRR935090, SRR935091, and SRR935092. The original sample files contain tens of millions of reads but for the purpose of this course we have sub-sampled them to 100,000 reads per sample, so that they are easy to manage, and made them available at the SciLifeLab Data Repository. These FASTQ files will then be quality controlled with FastQC and aligned to a genome. The QC output will be aggregated with MultiQC and the alignments will be used to generate a count table, i.e. a table that shows how many reads map to each gene for each sample. This count table is then what the downstream analysis will be based on.\n\nNow try to run the whole workflow. Hopefully you see something like this.\nBuilding DAG of jobs...\nUsing shell: /bin/bash\nProvided cores: 1 (use --cores to define parallelism)\nRules claiming more threads will be scaled down.\nJob stats:\njob                     count    min threads    max threads\n--------------------  -------  -------------  -------------\nalign_to_genome             3              1              1\nall                         1              1              1\nfastqc                      3              1              1\ngenerate_count_table        1              1              1\ngenerate_rulegraph          1              1              1\nget_SRA_by_accession        3              1              1\nget_genome_fasta            1              1              1\nget_genome_gff3             1              1              1\nindex_genome                1              1              1\nmultiqc                     1              1              1\nsort_bam                    3              1              1\ntotal                      19              1              1\n\nSelect jobs to execute...\n\n[Mon Oct 25 17:13:47 2021]\nrule get_genome_fasta:\n    output: data/ref/NCTC8325.fa.gz\n    jobid: 6\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n--2021-10-25 17:13:48--  ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz\n           =&gt; ‘data/ref/NCTC8325.fa.gz’\nResolving ftp.ensemblgenomes.org (ftp.ensemblgenomes.org)... 193.62.197.75\nConnecting to ftp.ensemblgenomes.org (ftp.ensemblgenomes.org)|193.62.197.75|:21... connected.\nLogging in as anonymous ... Logged in!\n==&gt; SYST ... done.    ==&gt; PWD ... done.\n.\n.\n[lots of stuff]\n.\n.\nlocalrule all:\n    input: results/tables/counts.tsv, results/multiqc/multiqc.html, results/rulegraph.png\n    jobid: 0\n    resources: tmpdir=/var/folders/p0/6z00kpv16qbf_bt52y4zz2kc0000gp/T\n\n[Mon Oct 25 17:14:38 2021]\nFinished job 0.\n19 of 19 steps (100%) done\nAfter everything is done, the workflow will have resulted in a bunch of files in the directories data/ and results/. Take some time to look through the structure, in particular the quality control reports in results/multiqc/ and the count table in results/tables/.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow the MRSA workflow looks.\nHow to run the MRSA workflow.\nWhich output files the MRSA workflow produces."
+  },
+  {
+    "objectID": "pages/snakemake.html#parameters",
+    "href": "pages/snakemake.html#parameters",
+    "title": "Working with Snakemake",
+    "section": "5 Parameters",
+    "text": "5 Parameters\nIn a typical bioinformatics project, considerable efforts are spent on tweaking parameters for the various programs involved. It would be inconvenient if you had to change in the shell scripts themselves every time you wanted to run with a new setting. Luckily, there is a better option for this: the params keyword.\nrule some_rule:\n    output:\n        \"...\"\n    input:\n        \"...\"\n    params:\n        cutoff=2.5\n    shell:\n        \"\"\"\n        some_program --cutoff {params.cutoff} {input} {output}\n        \"\"\"\nMost of the programs are run with default settings in the MRSA workflow and don’t use the params: directive. However, the get_SRA_by_accession rule is an exception. Here the remote address for each of the files to download is passed to the shell directive via:\ndef get_sample_url(wildcards):\n    samples = {\n        \"SRR935090\": \"https://figshare.scilifelab.se/ndownloader/files/39539767\",\n        \"SRR935091\": \"https://figshare.scilifelab.se/ndownloader/files/39539770\",\n        \"SRR935092\": \"https://figshare.scilifelab.se/ndownloader/files/39539773\"\n    }\n    return samples[wildcards.sample_id]\n\nrule get_SRA_by_accession:\n    \"\"\"\n    Retrieve a single-read FASTQ file\n    \"\"\"\n    output:\n        \"data/{sample_id}.fastq.gz\"\n    params:\n        url = get_sample_url\n    shell:\n        \"\"\"\n        wget -O - {params.url} | seqtk sample - 25000 | gzip -c &gt; {output[0]}\n        \"\"\"\nYou may recognize this from page 2 of this tutorial where we used input functions to generate strings and lists of strings for the input: section of a rule. Using a function to return values based on the wildcards also works for params:. Here sample_id is a wildcard which in this specific workflow can be either SRR935090, SRR935091, or SRR935092. The wildcards object is passed to the function get_sample_url and depending on what output the rule is supposed to generate, wildcards.sample_id will take the value of either of the three sample ids. The samples variable defined in the function is a Python dictionary that has the URLs for each sample_id hard-coded. This dictionary is used to convert the value of the sample_id wildcard to a URL, which is returned by the function. Finally, in the shell: directive we access the url parameter with {params.url}. (We could have written three separate rules to download the samples, but it’s easy to see how that can become impractical.)\nLet’s add another parameter to the get_SRA_by_accession rule. As you can see in the shell command the FASTQ file downloaded by wget gets piped directly (the -O - part means send contents to STDOUT) to the seqtk sample command which reads from STDIN and outputs 25000 randomly sampled reads (out of the 100,000 contained in the example FASTQ file). Change in the rule to use the parameter max_reads instead and set the value to 20000. If you need help, click to show the solution below.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\nrule get_SRA_by_accession:\n    \"\"\"\n    Retrieve a single-read FASTQ file\n    \"\"\"\n    output:\n        \"data/{sample_id}.fastq.gz\"\n    params:\n        url = get_sample_url,\n        max_reads = 20000\n    shell:\n        \"\"\"\n        wget -O - {params.url} | seqtk sample - {params.max_reads} | gzip -c &gt; {output[0]}\n        \"\"\"\n\n\n\nNow run through the workflow. Because there’s been changes to the get_SRA_by_accession rule this will trigger a re-run of the rule for all three accessions. In addition all downstream rules that depend on output from get_SRA_by_accession are re-run.\nAs you can see the parameter values we set in the params section don’t have to be static, they can be any Python expression. In particular, Snakemake provides a global dictionary of configuration parameters called config. Let’s modify get_SRA_by_accession to look something like this in order to make use of this dictionary:\nrule get_SRA_by_accession:\n    \"\"\"\n    Retrieve a single-read FASTQ file\n    \"\"\"\n    output:\n        \"data/{sample_id}.fastq.gz\"\n    params:\n        url = get_sample_url,\n        max_reads = config[\"max_reads\"]\n    shell:\n        \"\"\"\n        wget -L {params.url} | seqtk sample - {params.max_reads} | gzip -c &gt; {output[0]}\n        \"\"\"\nNote that Snakemake now expects there to be a key named max_reads in the config dictionary. If we don’t populate the dictionary somehow the dictionary will be empty so if you were to run the workflow now it would trigger a KeyError (try running snakemake -s snakefile_mrsa.smk -n to see for yourself). In order to populate the config dictionary with data for the workflow we could use the snakemake --config KEY=VALUE syntax directly from the command line (e.g. snakemake --config max_reads=20000 -s snakefile_mrsa.smk). However, from a reproducibility perspective, it’s not optimal to set parameters from the command line, since it’s difficult to keep track of which parameter values that were used.\nA much better alternative is to use the --configfile FILE option to supply a configuration file to Snakemake. In this file we can collect all the project-specific settings, sample ids and so on. This also enables us to write the Snakefile in a more general manner so that it can be better reused between projects. Like several other files used in these tutorials, this file should be in YAML format. Create the file below and save it as config.yml.\nmax_reads: 25000\nIf we now run Snakemake with --configfile config.yml, it will parse this file to form the config dictionary. If you want to overwrite a parameter value, e.g. for testing, you can still use the --config KEY=VALUE flag, as in --config max_reads=1000.\n\n\n\n\n\n\nTip\n\n\n\nRather than supplying the config file from the command line you could also add the line configfile: \"config.yml\" to the top of your Snakefile. Keep in mind that with such a setup Snakemake will complain if the file config.yml is not present.\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to set parameter values with the params directive.\nHow to run Snakemake with the config variable and with a configuration file."
+  },
+  {
+    "objectID": "pages/snakemake.html#logs",
+    "href": "pages/snakemake.html#logs",
+    "title": "Working with Snakemake",
+    "section": "6 Logs",
+    "text": "6 Logs\nAs you probably noticed it was difficult to follow how the workflow progressed since some rules printed a lot of output to the terminal. In some cases this also contained important information, such as statistics on the sequence alignments or genome indexing. This could be valuable for example if you later in the project get weird results and want to debug. It’s also important from a reproducibility perspective that the “paper trail” describing how the outputs were generated is saved. Luckily, Snakemake has a feature that can help with this. Just as we define input and output in a rule we can also define log.\nrule some_rule:\n    output:\n        \"...\"\n    input:\n        \"...\"\n    log:\n        \"...\"\n    shell:\n        \"\"\"\n        echo 'Converting {input} to {output}' &gt; {log}\n        \"\"\"\nA log file is not different from any other output file, but it’s dealt with a little differently by Snakemake. For example, it’s shown in the file summary when using -D and unlike other output files it’s not deleted if jobs fail which of course is necessary for debugging purposes. It’s also a good way to clarify the purpose of the file. We probably don’t need to save logs for all the rules, only the ones with interesting output.\n\nget_genome_fasta and get_genome_gff3 would be good to log since they are dependent on downloading files from an external server.\nmultiqc aggregates quality control data for all the samples into one html report, and the log contains information about which samples were aggregated.\nindex_genome outputs some statistics about the genome indexing.\nalign_to_genome outputs important statistics about the alignments. This is probably the most important log to save.\n\nNow add a log file to some or all of the rules above. A good place to save them to would be results/logs/rule_name/. In order to avoid that multiple jobs write to the same files Snakemake requires that all output and log files contain the same wildcards, so be sure to include any wildcards used in the rule in the log name as well, e.g. {some_wildcard}.log.\nYou also have to specify in the shell section of each rule what you want the log to contain. Some of the programs we use send their log information to standard out, some to standard error and some let us specify a log file via a flag.\nFor example, in the align_to_genome rule, it could look like this (Bowtie2 writes log info to standard error):\nrule align_to_genome:\n    \"\"\"\n    Align a fastq file to a genome index using Bowtie 2.\n    \"\"\"\n    output:\n        \"results/bam/{sample_id,\\w+}.bam\"\n    input:\n        \"data/{sample_id}.fastq.gz\",\n        \"results/bowtie2/NCTC8325.1.bt2\",\n        \"results/bowtie2/NCTC8325.2.bt2\",\n        \"results/bowtie2/NCTC8325.3.bt2\",\n        \"results/bowtie2/NCTC8325.4.bt2\",\n        \"results/bowtie2/NCTC8325.rev.1.bt2\",\n        \"results/bowtie2/NCTC8325.rev.2.bt2\"\n    log:\n        \"results/logs/align_to_genome/{sample_id}.log\"\n    shell:\n        \"\"\"\n        bowtie2 -x results/bowtie2/NCTC8325 -U {input[0]} &gt; {output} 2&gt;{log}\n        \"\"\"\nTo save some time you can use the info below.\n# wget has a -o flag for specifying the log file\nwget remote_file -O output_file -o {log}\n\n# MultiQC and featureCounts write to standard error so we redirect with \"2&gt;\"\nmultiqc -n output_file input_files 2&gt; {log}\nfeatureCounts -t gene -g gene_id -a gff_file -o output_file input_files 2&gt;{log}\n\n# Bowtie2-build redirects to standard out so we use \"&gt;\"\nbowtie2-build input_file index_dir &gt; {log}\nNow rerun the whole workflow. Do the logs contain what they should? Note how much easier it is to follow the progression of the workflow when the rules write to logs instead of to the terminal.\n\n\n\n\n\n\nTip\n\n\n\nIf you have a rule with a shell directive in which several commands are run and you want to save stdout and stderr for all commands into the same log file you can add exec &{log} as the first line of the shell directive.\n\n\nIf you run with -D (or -S for a simpler version) you will see that the summary table now also contains the log file for each of the files in the workflow.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to redirect output to log files with the log directive."
+  },
+  {
+    "objectID": "pages/snakemake.html#temporary-files",
+    "href": "pages/snakemake.html#temporary-files",
+    "title": "Working with Snakemake",
+    "section": "7 Temporary files",
+    "text": "7 Temporary files\nIt’s not uncommon that workflows contain temporary files that should be kept for some time and then deleted once they are no longer needed. A typical case could be that some operation generates a file, which is then compressed to save space or indexed to make searching faster. There is then no need to save the original output file. Take a look at the job graph for our workflow again. The output from align_to_genome is a BAM file, which contains information about all the reads for a sample and where they map in the genome. For downstream processing we need this file to be sorted by genome coordinates. This is what the rule sort_bam is for. We therefore end up with both results/bam/{sample_id}.bam and results/bam/{sample_id}.sorted.bam.\nIn Snakemake we can mark an output file as temporary like this:\noutput: temp(\"...\")\nThe file will then be deleted as soon as all jobs where it’s an input have finished. Now do this for the output of align_to_genome. We have to rerun the rule for it to trigger, so use -R align_to_genome. It should look something like this:\n.\n.\nrule sort_bam:\n    input: results/bam/SRR935090.bam\n    output: results/bam/SRR935090.sorted.bam\n    jobid: 2\n    wildcards: sample_id=SRR935090\n\nRemoving temporary output file results/bam/SRR935090.bam.\nFinished job 2.\n.\n.\n\n\n\n\n\n\nTip\n\n\n\nSometimes you may want to trigger removal of temporary files without actually rerunning the jobs. You can then use the --delete-temp-output flag. In some cases you may instead want to run only parts of a workflow and therefore want to prevent files marked as temporary from being deleted (because the files are needed for other parts of the workflow). In such cases you can use the --notemp flag.\n\n\nSnakemake has a number of options for marking files:\n\ntemp(\"...\"): The output file should be deleted once it’s no longer needed by any rules.\nprotected(\"...\"): The output file should be write-protected. Typically used to protect files that require a huge amount of computational resources from being accidentally deleted.\nancient(\"...\"): The timestamp of the input file is ignored and it’s always assumed to be older than any of the output files.\ntouch(\"...\"): The output file should be “touched”, i.e. created or updated, when the rule has finished. Typically used as “flag files” to enforce some rule execution order without real file dependencies.\ndirectory(\"...\"): The output is a directory rather than a file.\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to mark an output file as temporary for automatic removal."
+  },
+  {
+    "objectID": "pages/snakemake.html#targets",
+    "href": "pages/snakemake.html#targets",
+    "title": "Working with Snakemake",
+    "section": "8 Targets",
+    "text": "8 Targets\nWe’ve mentioned that Snakemake rules take either strings or a list of strings as input, and that we can use any Python expression in Snakemake workflows. Here we’ll show how these features help us condense the code of rules.\nConsider the rule align_to_genome below.\nrule align_to_genome:\n    \"\"\"\n    Align a fastq file to a genome index using Bowtie 2.\n    \"\"\"\n    output:\n        \"results/bam/{sample_id}.bam\"\n    input:\n        \"data/{sample_id}.fastq.gz\",\n        \"results/bowtie2/NCTC8325.1.bt2\",\n        \"results/bowtie2/NCTC8325.2.bt2\",\n        \"results/bowtie2/NCTC8325.3.bt2\",\n        \"results/bowtie2/NCTC8325.4.bt2\",\n        \"results/bowtie2/NCTC8325.rev.1.bt2\",\n        \"results/bowtie2/NCTC8325.rev.2.bt2\"\n    shell:\n        \"\"\"\n        bowtie2 -x results/bowtie2/NCTC8325 -U {input[0]} &gt; {output}\n        \"\"\"\nHere we have seven inputs; the FASTQ file with the reads and six files with similar file names from the Bowtie2 genome indexing. Instead of writing all the filenames we can tidy this up by using a Python expression to generate a list of these files instead. If you’re familiar with Python you could do this with list comprehensions like this:\ninput:\n    \"data/{sample_id}.fastq.gz\",\n    [f\"results/bowtie2/NCTC8325.{substr}.bt2\" for\n        substr in [\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"]]\nThis will take the elements of the list of substrings one by one, and insert that element in the place of {substr}. Since this type of aggregating rules are quite common, Snakemake also has a more compact way of achieving the same thing.\ninput:\n    \"data/{sample_id}.fastq.gz\",\n    expand(\"results/bowtie2/NCTC8325.{substr}.bt2\",\n        substr = [\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"])\n\n\n\n\n\n\nCaution\n\n\n\nWhen using expand() like this, substr is not a wildcard because it is resolved to the values explicitly given inside the expand expression.\n\n\nNow change in the rules index_genome and align_to_genome to use the expand() expression.\nIn the workflow we decide which samples to run by including the SRR ids in the names of the inputs to the rules multiqc and generate_count_table:\nrule generate_count_table:\n    output:\n        \"results/tables/counts.tsv\"\n    input:\n        bams = [\"results/bam/SRR935090.sorted.bam\",\n                \"results/bam/SRR935091.sorted.bam\",\n                \"results/bam/SRR935092.sorted.bam\"],\n...\nrule multiqc:\n    output:\n        html = \"results/multiqc/multiqc.html\",\n        stats = \"results/multiqc/multiqc_general_stats.txt\"\n    input:\n        \"results/fastqc/SRR935090_fastqc.zip\",\n        \"results/fastqc/SRR935091_fastqc.zip\",\n        \"results/fastqc/SRR935092_fastqc.zip\"\nThe output files from these two rules, results/multiqc.html and results/tables/counts.tsv, are in turn specified as input to the all rule at the top of the file. Because the first rule is targeted by default when we run Snakemake on the command line (like we mentioned in snakemake-4-the-mrsa-workflow) this is what triggers the rules to run on each of the three samples.\nHowever, this is a potential source of errors since it’s easy to change in one place and forget to change in the other. Because we can use Python code “everywhere” let’s instead define a list of sample ids and put at the very top of the Snakefile, just before the rule all:\nSAMPLES = [\"SRR935090\", \"SRR935091\", \"SRR935092\"]\nNow use expand() in multiqc and generate_count_table to use SAMPLES for the sample ids. For the multiqc rule it could look like this:\ninput:\n    expand(\"results/fastqc/{sample_id}_fastqc.zip\", sample_id = SAMPLES)\nSee if you can update the generate_count_table rule in the same manner!\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use the expand() expression to create a list with file names, inserting all provided wildcard values."
+  },
+  {
+    "objectID": "pages/snakemake.html#shadow-rules",
+    "href": "pages/snakemake.html#shadow-rules",
+    "title": "Working with Snakemake",
+    "section": "9 Shadow rules",
+    "text": "9 Shadow rules\nTake a look at the index_genome rule below:\nrule index_genome:\n    \"\"\"\n    Index a genome using Bowtie 2.\n    \"\"\"\n    output:\n        index = expand(\"results/bowtie2/NCTC8325.{substr}.bt2\",\n           substr = [\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"])\n    input:\n        \"data/NCTC8325.fa.gz\"\n    log:\n        \"results/logs/index_genome/NCTC8325.log\"\n    shell:\n        \"\"\"\n        # Bowtie2 cannot use .gz, so unzip to a temporary file first\n        gunzip -c {input} &gt; tempfile\n        bowtie2-build tempfile results/bowtie2/NCTC8325 &gt;{log}\n\n        # Remove the temporary file\n        rm tempfile\n        \"\"\"\nThere is a temporary file here called tempfile which is the uncompressed version of the input, since Bowtie2 cannot use compressed files. There are a number of drawbacks with having files that aren’t explicitly part of the workflow as input/output files to rules:\n\nSnakemake cannot clean up these files if the job fails, as it would do for normal output files.\nIf several jobs are run in parallel there is a risk that they write to tempfile at the same time. This can lead to very scary results.\nSometimes we don’t know the names of all the files that a program can generate. It is, for example, not unusual that programs leave some kind of error log behind if something goes wrong.\n\nAll of these issues can be dealt with by using the shadow option for a rule. The shadow option results in that each execution of the rule is run in an isolated temporary directory (located in .snakemake/shadow/ by default). There are a few options for shadow (for the full list of these options see the Snakemake docs). The most simple is shadow: \"minimal\", which means that the rule is executed in an empty directory that the input files to the rule have been symlinked into. For the rule below, that means that the only file available would be input.txt. The shell commands would generate the files some_other_junk_file and output.txt. Lastly, Snakemake will move the output file (output.txt) to its “real” location and remove the whole shadow directory. We therefore never have to think about manually removing some_other_junk_file.\nrule some_rule:\n    output:\n        \"output.txt\"\n    input:\n        \"input.txt\"\n    shadow: \"minimal\"\n    shell:\n        \"\"\"\n        touch some_other_junk_file\n        cp {input} {output}\n        \"\"\"\nTry this out for the rules where we have to “manually” deal with files that aren’t tracked by Snakemake (multiqc, index_genome). Also remove the shell commands that remove temporary files from those rules, as they are no longer needed. Now rerun the workflow and validate that the temporary files don’t show up in your working directory.\n\n\n\n\n\n\nTip\n\n\n\nSome people use the shadow option for almost every rule and some never use it at all. One thing to keep in mind is that it leads to some extra file operations when the outputs are moved to their final location. This is no issue when the shadow directory is on the same disk as the output directory, but if you’re running on a distributed file system and generate very many or very large files it might be worth considering other options (see e.g. the --shadow-prefix flag).\n\n\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to use the shadow option to handle files that are not tracked by Snakemake."
+  },
+  {
+    "objectID": "pages/snakemake.html#generalising-workflows",
+    "href": "pages/snakemake.html#generalising-workflows",
+    "title": "Working with Snakemake",
+    "section": "10 Generalising workflows",
+    "text": "10 Generalising workflows\nIt’s a good idea to separate project-specific parameters from the actual implementation of the workflow. This allows anyone using the workflow to modify its behaviour without changing the underlying code, making the workflow more general.\nIn order to generalize our RNA-seq analysis workflow we should move all project-specific information to config.yml. This means that we want the config file to:\n\nSpecify which samples to run.\nSpecify which genome to align to and where to download its sequence and annotation files.\n(Contain any other parameters we might need to make it into a general workflow, e.g. to support both paired-end and single-read sequencing)\n\n\n\n\n\n\n\nNote\n\n\n\nPutting all configuration in config.yml will break the generate_rulegraph rule. You can fix it either by replacing --config max_reads=0 with --configfile=config.yml in the shell command of that rule in the Snakefile, or by adding configfile: \"config.yml\" to the top of the Snakefile (as mentioned in a previous tip).\n\n\nThe first point is straightforward; rather than using SAMPLES = [\"...\"] in the Snakefile we define it as a parameter in config.yml. You can either add it as a list similar to the way it was expressed before by adding:\nSAMPLES: [\"SRR935090\", \"SRR935091\", \"SRR935092\"]\nTo config.yml, or you can use this YAML notation (whether you choose SAMPLES or sample_ids as the name of the entry doesn’t matter, you will just have to reference the same name in the config dictionary inside the workflow):\nsample_ids:\n  - SRR935090\n  - SRR935091\n  - SRR935092\nChange the workflow to reference config[\"sample_ids\"] (if using the latter example) instead of SAMPLES, as in:\nexpand(\"results/fastqc/{sample_id}_fastqc.zip\",\n            sample_id = config[\"sample_ids\"])\nRemove the line with SAMPLES = [\"SRR935090\", \"SRR935091\", \"SRR935092\"] that we added to the top of snakefile_mrsa.smk in Snakemake 8: Targets.\nDo a dry-run afterwards to make sure that everything works as expected.\nYou may remember from the snakemake-5-parameters part of this tutorial that we’re using a function to return the URL of the FASTQ files to download for each sample:\ndef get_sample_url(wildcards):\n    samples = {\n        \"SRR935090\": \"https://figshare.scilifelab.se/ndownloader/files/39539767\",\n        \"SRR935091\": \"https://figshare.scilifelab.se/ndownloader/files/39539770\",\n        \"SRR935092\": \"https://figshare.scilifelab.se/ndownloader/files/39539773\"\n    }\n    return samples[wildcards.sample_id]\nHere the URLs of each sample_id is hard-coded in the samples dictionary inside the function. To generalize this function we can move the definition to the config file, placing it for example under an entry that we call sample_urls like this:\nsample_urls:\n  SRR935090: \"https://figshare.scilifelab.se/ndownloader/files/39539767\"\n  SRR935091: \"https://figshare.scilifelab.se/ndownloader/files/39539770\"\n  SRR935092: \"https://figshare.scilifelab.se/ndownloader/files/39539773\"\nThis is what’s called ‘nested’ key/value pairs, meaning that each sample_id -&gt; URL pair becomes nested under the config key sample_urls. So in order to access the URL of e.g. SRR935090 we would use config[\"sample_urls\"][\"SRR935090\"]. This means that you will have to update the get_sample_url function to:\ndef get_sample_url(wildcards):\n    return config[\"sample_urls\"][wildcards.sample_id]\nNow the function uses the global config dictionary to return URLs for each sample_id. Again, do a dry-run to see that the new implementation works.\n\n\n\n\n\n\nTip\n\n\n\nIf you were to scale up this workflow with more samples it could become impractical to have to define the URLs by hand in the config file. A tip then is to have a separate file where samples are listed in one column and the URLs (or file paths) in another column. With a few lines of python code you could then read that list at the start of the workflow and add each sample to the config dictionary.\n\n\nNow let’s take a look at the genome reference used in the workflow. In the get_genome_fasta and get_genome_gff3 rules we have hard-coded FTP paths to the FASTA GFF annotation file for the genome NCTC8325. We can generalize this in a similar fashion to what we did with the get_SRA_by_accession rule. Let’s add a nested entry called genomes to the config file that will hold the genome id and FTP paths to the FASTA and GFF file:\ngenomes:\n  NCTC8325:\n    fasta: ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz\n    gff3: ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.37.gff3.gz\n  ST398:\n    fasta: ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection//staphylococcus_aureus_subsp_aureus_st398/dna/Staphylococcus_aureus_subsp_aureus_st398.ASM958v1.dna.toplevel.fa.gz\n    gff3: ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/gff3/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_st398//Staphylococcus_aureus_subsp_aureus_st398.ASM958v1.37.gff3.gz\nAs you can see this is very similar to what with did with sample_urls, just that we have one more nested level. Now to access the FTP path to the FASTA file for genome id NCTC8325 we can use config[\"genomes\"][\"NCTC8325\"][\"fasta\"].\nLet’s now look at how to do the mapping from genome id to FASTA path in the rule get_genome_fasta. This is how the rule currently looks (if you have added the log section as previously described).\nrule get_genome_fasta:\n    \"\"\"\n    Retrieve the sequence in fasta format for a genome.\n    \"\"\"\n    output:\n        \"data/raw_external/NCTC8325.fa.gz\"\n    log:\n        \"results/logs/get_genome_fasta/NCTC8325.log\"\n    shell:\n        \"\"\"\n        wget -o {log} ftp://ftp.ensemblgenomes.org/pub/bacteria/release-37/fasta/bacteria_18_collection/staphylococcus_aureus_subsp_aureus_nctc_8325/dna//Staphylococcus_aureus_subsp_aureus_nctc_8325.ASM1342v1.dna_rm.toplevel.fa.gz -O {output}\n        \"\"\"\nWe don’t want the hard-coded genome id NCTC8325, so replace that with a wildcard, say {genome_id} (remember to add the wildcard to the log: directive as well). We now need to supply the remote paths to the FASTA file for a given genome id. Because we’ve added this information to the config file we just need to pass it to the rule in some way, and just like in the get_SRA_by_accession rule we’ll use a function to do the job:\ndef get_fasta_path(wildcards):\n    return config[\"genomes\"][wildcards.genome_id][\"fasta\"]\n\nrule get_genome_fasta:\n    \"\"\"\n    Retrieve the sequence in fasta format for a genome.\n    \"\"\"\n    output:\n        \"data/ref/{genome_id}.fa.gz\"\n    log:\n        \"results/logs/get_genome_fasta/{genome_id}.log\"\n    params:\n        fasta_path = get_fasta_path\n    shell:\n        \"\"\"\n        wget -o {log} {params.fasta_path} -O {output}\n        \"\"\"\nNow change the get_genome_gff3 rule in a similar manner. Click to see the solution below if you’re having trouble.\n\n\n\n\n\n\nClick to show\n\n\n\n\n\ndef get_gff_path(wildcards):\n    return config[\"genomes\"][wildcards.genome_id][\"gff3\"]\n\nrule get_genome_gff3:\n    \"\"\"\n    Retrieve annotation in gff3 format for a genome.\n    \"\"\"\n    output:\n        \"data/ref/{genome_id}.gff3.gz\"\n    log:\n        \"results/logs/get_genome_gff3/{genome_id}.log\"\n    params:\n        gff3_path = get_gff_path\n    shell:\n        \"\"\"\n        wget -o {log} {params.gff3_path} -O {output}\n        \"\"\"\n\n\n\nAlso change in index_genome to use a wildcard rather than a hard-coded genome id. Here you will run into a complication if you have followed the previous instructions and use the expand() expression. We want the list to expand to [\"results/bowtie2/{genome_id}.1.bt2\", \"results/bowtie2/{genome_id}.2.bt2\", ...], i.e. only expanding the wildcard referring to the Bowtie2 index. To keep the genome_id wildcard from being expanded we have to “mask” it with double curly brackets: {genome_id}. In addition, we need to replace the hard-coded results/bowtie2/NCTC8325 in the shell directive of the rule with the genome id wildcard. Inside the shell directive the wildcard object is accessed with this syntax: {wildcards.genome_id}, so the Bowtie2-build command should be:\nbowtie2-build tempfile results/bowtie2/{wildcards.genome_id} &gt; {log}\nNote that this will only work if the {genome_id} wildcard can be resolved to something defined in the config (currently NCTC8325 or ST398). If you try to generate a FASTA file for a genome id not defined in the config Snakemake will complain, even at the dry-run stage.\nFinally, remember that any wildcards need to be present both in the output: and log: directives? This means we have to update the log: directive in index_genome as well. The final rule should look like this:\nrule index_genome:\n    \"\"\"\n    Index a genome using Bowtie 2.\n    \"\"\"\n    output:\n        expand(\"results/bowtie2/{{genome_id}}.{substr}.bt2\",\n            substr = [\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"])\n    input:\n        \"data/ref/{genome_id}.fa.gz\"\n    log:\n        \"results/logs/index_genome/{genome_id}.log\"\n    shadow: \"minimal\"\n    shell:\n        \"\"\"\n        # Bowtie2 cannot use .gz, so unzip to a temporary file first\n        gunzip -c {input} &gt; tempfile\n        bowtie2-build tempfile results/bowtie2/{wildcards.genome_id} &gt; {log}\n        \"\"\"\nGood job! The rules get_genome_fasta, get_genome_gff3 and index_genome can now download and index any genome as long as we provide valid links in the config file.\nHowever, we need to define somewhere which genome id we actually want to use when running the workflow. This needs to be done both in align_to_genome and generate_count_table. Do this by introducing a parameter in config.yml called \"genome_id\" (you can set it to either NCTC8325 or ST398), e.g.:\ngenome_id: \"NCTC8325\"\nNow we can resolve the genome_id wildcard from the config. See below for an example for align_to_genome. Here the substr wildcard gets expanded from a list while genome_id gets expanded from the config file.\ninput:\n    \"data/{sample_id}.fastq.gz\",\n    index = expand(\"results/bowtie2/{genome_id}.{substr}.bt2\",\n           genome_id = config[\"genome_id\"],\n           substr = [\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"])\nAlso change the hard-coded genome id in the generate_count_table input in a similar manner:\nrule generate_count_table:\n    \"\"\"\n    Generate a count table using featureCounts.\n    \"\"\"\n    output:\n        \"results/tables/counts.tsv\",\n        \"results/tables/counts.tsv.summary\"\n    input:\n        bams=expand(\"results/bam/{sample_id}.sorted.bam\",\n                    sample_id = config[\"sample_ids\"]),\n        annotation=expand(\"data/ref/{genome_id}.gff3.gz\",\n                    genome_id = config[\"genome_id\"])\n    log:\n        \"results/logs/generate_count_table.log\"\n    shell:\n        \"\"\"\n        featureCounts -t gene -g gene_id -a {input.annotation} -o {output[0]} {input.bams} 2&gt;{log}\n        \"\"\"\nIn general, we want the rules as far downstream as possible in the workflow to be the ones that determine what the wildcards should resolve to. In our case this is align_to_genome and generate_count_table. You can think of it like the rule that really “needs” the file asks for it, and then it’s up to Snakemake to determine how it can use all the available rules to generate it. Here the align_to_genome rule says “I need this genome index to align my sample to” and then it’s up to Snakemake to determine how to download and build the index.\nOne last thing is to change the hard-coded NCTC8325 in the shell: directive of align_to_genome. Bowtie2 expects the index name supplied with the -x flag to be without the “.*.bt2” suffix so we can’t use -x {input.index}. Instead we’ll insert the genome_id directly from the config like this:\nshell:\n    \"\"\"\n    bowtie2 -x results/bowtie2/{config[genome_id]} -U {input[0]} &gt; {output} 2&gt;{log}\n    \"\"\"\n\n\n\n\n\n\nSummary\n\n\n\nWell done! You now have a complete Snakemake workflow with a number of excellent features:\n\nA general RNA-seq pipeline which can easily be reused between projects, thanks to clear separation between code and settings.\nGreat traceability due to logs and summary tables.\nClearly defined the environment for the workflow using Conda.\nThe workflow is neat and free from temporary files due to using temp() and shadow.\nA logical directory structure which makes it easy to separate data and results of different software packages.\nA project set up in a way that makes it very easy to distribute and reproduce either via Git, Snakemake’s --archive option or a Docker image."
+  },
+  {
+    "objectID": "pages/snakemake.html#reading-samples-from-a-file-instead-of-hard-coding-them",
+    "href": "pages/snakemake.html#reading-samples-from-a-file-instead-of-hard-coding-them",
+    "title": "Working with Snakemake",
+    "section": "11 Reading samples from a file instead of hard-coding them",
+    "text": "11 Reading samples from a file instead of hard-coding them\nSo far we’ve specified the samples to use in the workflow either as a hard-coded list in the Snakefile, or as a list in the configuration file. This is of course impractical for large real-world examples. Here we’ll just quickly show how you could supply the samples instead via a tab-separated file. For example you could create a file called samples.tsv with the following content:\nSRR935090   https://figshare.scilifelab.se/ndownloader/files/39539767\nSRR935091   https://figshare.scilifelab.se/ndownloader/files/39539770\nSRR935092   https://figshare.scilifelab.se/ndownloader/files/39539773\nThe first column has the sample id and the second column has the url to the fastq file. Now in order to read this into the workflow we need to use a few lines of python code. Since you can mix python code with rule definitions in Snakemake we’ll just add the following lines to the top of the Snakefile:\n# define an empty 'samples' dictionary\nsamples = {}\n# read the sample list file and populate the dictionary\nwith open(\"samples.tsv\", \"r\") as fhin:\n    for line in fhin:\n        # strip the newline character from the end of the line\n        # then split by tab character to get the sample id and url\n        sample_id, url = line.strip().split(\"\\t\")\n        # store the url in the dictionary with the sample id as key\n        samples[sample_id] = url\nNow we can use the samples dictionary in the workflow. For example, to get the url for SRR935090 we can use samples[\"SRR935090\"].\nFor example, the get_sample_url function can now be written as:\ndef get_sample_url(wildcards):\n    return samples[wildcards.sample_id]\nWe can also use the samples dictionary in expand(), for example in the multiqc rule:\nrule multiqc:\n    \"\"\"\n    Aggregate all FastQC reports into a MultiQC report.\n    \"\"\"\n    output:\n        html=\"results/multiqc/multiqc.html\",\n        stats=\"results/multiqc/multiqc_general_stats.txt\"\n    input:\n        expand(\"results/fastqc/{sample_id}_fastqc.zip\", sample_id = samples.keys())\n    log:\n        \"results/logs/multiqc/multiqc.log\"\n    shadow: \"minimal\"\n    shell:\n        \"\"\"\n        # Run multiQC and keep the html report\n        multiqc -n multiqc.html {input} 2&gt; {log}\n        mv multiqc.html {output.html}\n        mv multiqc_data/multiqc_general_stats.txt {output.stats}\n        \"\"\"\nNow this depends on there being a samples.tsv file in the working directory. To make this a configurable parameter we can add it to the config file:\nsample_list: \"samples.tsv\"\nand update the code for populating the samples dictionary:\n# define an empty 'samples' dictionary\nsamples = {}\n# read the sample list file and populate the dictionary\nwith open(config[\"sample_list\"], \"r\") as fhin:\n    for line in fhin:\n        # strip the newline character from the end of the line\n        # then split by tab character to get the sample id and url\n        sample_id, url = line.strip().split(\"\\t\")\n        # store the url in the dictionary with the sample id as key\n        samples[sample_id] = url\nThis way, anyone can take our Snakefile and just update the path to their own sample_list using the config file.\n\n\n\n\n\n\nQuick recap\n\n\n\nIn this section we’ve learned:\n\nHow to generalize a Snakemake workflow."
+  },
+  {
+    "objectID": "pages/snakemake.html#extra-material",
+    "href": "pages/snakemake.html#extra-material",
+    "title": "Working with Snakemake",
+    "section": "12 Extra material",
+    "text": "12 Extra material\nIf you want to read more about Snakemake in general you can find several resources here:\n\nThe Snakemake documentation is available on ReadTheDocs.\nHere is another (quite in-depth) tutorial.\nIf you have questions, check out stack overflow.\n\n\n12.1 Using containers in Snakemake\nSnakemake also supports defining an Apptainer or Docker container for each rule (you will have time to work on the Containers tutorial later during the course). Analogous to using a rule-specific Conda environment, specify container: \"docker://some-account/rule-specific-image\" in the rule definition. Instead of a link to a container image, it is also possible to provide the path to a *.sif file (= a Singularity image file). When executing Snakemake, add the --software-deployment-method apptainer (or the shorthand --sdm apptainer) flag to the command line. For the given rule, an Apptainer container will then be created from the image or file that is provided in the rule definition on the fly by Snakemake and the rule will be run in this container.\nYou can find pre-made Apptainer or Docker images for many tools on https://biocontainers.pro/ (bioinformatics-specific) or on https://hub.docker.com/.\nHere is an example for a rule and its execution:\nrule align_to_genome:\n    output:\n        temp(\"results/bam/{sample_id,\\w+}.bam\")\n    input:\n        fastq = \"data/{sample_id}.fastq.gz\",\n        index = expand(\"results/bowtie2/{genome_id}.{substr}.bt2\",\n            genome_id=config[\"genome_id\"],\n            substr=[\"1\", \"2\", \"3\", \"4\", \"rev.1\", \"rev.2\"])\n    log:\n        expand(\"results/logs/align_to_genome/{{sample_id}}_{genome_id}.log\",\n            genome_id = config[\"genome_id\"])\n    container: \"docker://quay.io/biocontainers/bowtie2:2.5.0--py310h8d7afc0_0\"\n    shell:\n        \"\"\"\n        bowtie2 -x results/bowtie2/{config[genome_id]} -U {input.fastq} &gt; {output} 2&gt;{log}\n        \"\"\"\nStart your Snakemake workflow with the following command:\nsnakemake --software-deployment-method apptainer\nFeel free to modify the MRSA workflow according to this example. As Apptainer is a container software that was developed for HPC clusters, and for example the Mac version is still a beta version, it might not work to run your updated Snakemake workflow with Apptainer locally on your computer. In the next section we explain how you can run Snakemake workflows on UPPMAX where Apptainer is pre-installed.\n\n\n12.2 Running Snakemake workflows on HPC clusters\nIf you need to run a Snakemake workflow on a high-performance computing (HPC) cluster you have a wide range of options at your disposal. Via the plugin catalog you can find plugins that will add support for various HPC schedulers to Snakemake.\nHere we will focus on how to run Snakemake workflows on clusters with SLURM, a workload manager commonly used on HPC clusters in Sweden such as Rackham, Tetralith and Dardel.\n\n\n\n\n\n\nTip\n\n\n\nWhen running on remote clusters we highly recommend to use a session manager like tmux or screen so that you can run your workflow in a session in the background while doing other things on the cluster or even logging out of the cluster.\n\n\n\n12.2.1 Option 1: Run the entire workflow as a single job\nFor short workflows with only a few rules that need the same compute resources in terms of CPU (cores) and memory, you can submit the entire workflow as a job directly to the SLURM scheduler, or start an interactive job (in your tmux or screen session) and run your Snakemake workflow as you would do that on your local machine. Make sure to give your job enough time to finish running all rules of your Snakemake workflow.\nIf you choose this option, you don’t need to install anything from the plugin catalogue. However, your workflow may not run as efficiently as it could if you were to add SLURM support in Snakemake.\n\n\n12.2.2 Option 2: Use built-in SLURM support\nFor workflows with long run times and/or where each rule requires different compute resources, Snakemake comes with built in functionality for interacting with the SLURM workload manager and send each rule as a job to the SLURM queue and to track the status of each job.\nIn this case, you can start the workflow on the login node and let it run there until all jobs have finished. Given that workflows often consist of many rules, some of which may be highly resource demanding, this is the option we recommend when running most Snakemake workflows on HPC clusters.\nTo add SLURM support to Snakemake you first need to install the SLURM plugin from the plugin catalog. This can be done with conda:\nconda install -c conda-forge snakemake-executor-plugin-slurm\nOnce installed, adding the --executor slurm flag to your Snakemake command line call will enable the plugin. You also need to specify how many jobs Snakemake can submit to the SLURM queue at the same time with the -j flag. For example, to allow up to 100 jobs to be put into the queue at any given time, you would run Snakemake with the following command:\nsnakemake --executor slurm -j 100 &lt;other flags&gt;\n\n\n\n12.3 Specifying resources for SLURM\nDepending on the cluster you are using, you will need to specify some resource requirements for the rules in your workflow, such as the number of CPUs, memory, runtime and account id. This can be done either:\n\ndirectly on the command line with the --default-resources flag which sets default resource settings for all rules\nin the rule definition of your workflow using the resources: directive, or\nin a configuration profile, a folder with a config.yaml file that contains the resource settings.\n\nYou can also use a combination of these methods. For example, the SLURM account id (_e.g. naiss-2023-01-001), which will most likely be the same for all rules, can be set with --default-resources:\nsnakemake --executor slurm -j 100 --default-resources slurm_account=naiss-2023-01-001\nRule-specific resources such as runtime, memory and number of CPUs can be set in the rule definition, for example:\nrule testrule:\n    output:\n        \"results/output.txt\"\n    resources:\n        runtime = 60,\n        mem_mb = 16000,\n        cpus_per_task = 4\n    shell:\n        \"\"\"\n        uname -a &gt; {output}\n        \"\"\"\nThis rule uses the standard resource runtime to set the maximum allowed time (in minutes) for the rule, sets the memory requirement with mem_mb and the number of requested CPUs with cpus_per_task. In this example the rule will have a time limit of 60 minutes, will require 16G of RAM and 4 CPUs.\nSome clusters also require you to specify the partition you want to run your job on. The partition name will differ between clusters, for example the Rackham cluster uses core and node partitions, while Dardel uses e.g. shared and main. See the documentation for the cluster you are using for more information.\nThe partition can be set with the slurm_partition resource, for example like so:\nrule testrule:\n    output:\n        \"results/output.txt\"\n    resources:\n        runtime = 60,\n        mem_mb = 16000,\n        cpus_per_task = 4,\n        slurm_partition: \"shared\"\n    shell:\n        \"\"\"\n        uname -a &gt; {output}\n        \"\"\"\nTo make it easy to adapt your workflow to different compute clusters it is recommended to define resource settings in a configuration profile. A configuration profile is a folder with a config.yaml file that contains values for Snakemake command line arguments, allowing you to modify the behavior of Snakemake without changing the workflow code. For example, you could create a dardel folder (e.g. in the root of your workflow) with a config.yaml file that contains the following:\nexecutor: \"slurm\"\njobs: 100\ndefault-resources:\n  slurm_account: \"naiss-2023-01-001\"\n  slurm_partition: \"shared\"\n  mem_mb: 16000\n  cpus_per_task: 4\n  runtime: 60\nThis yaml-formatted file contains Snakemake command line arguments that will be used when running the workflow. You can then run Snakemake with the --profile flag pointing to the folder containing the config.yaml file:\nsnakemake --profile dardel\nThis greatly simplifies running the workflow on different clusters, and makes the command line call much more succinct.\nTo set rule-specific resources in the configuration profile, you can add a set_resources: section to the config.yaml file:\nexecutor: \"slurm\"\njobs: 100\ndefault-resources:\n  slurm_account: \"naiss-2023-01-001\"\n  slurm_partition: \"shared\"\n  mem_mb: 16000\n  cpus_per_task: 4\n  runtime: 60\nset_resources:\n  index_genome:\n    runtime: 240\n    mem_mb: 32000\n    cpus_per_task: 8\n  align_to_genome:\n    runtime: 120\n    mem_mb: 24000\n    cpus_per_task: 6\nIn this example, the index_genome rule will have a runtime of 240 minutes, will require 32G of RAM and 8 CPUs, while the align_to_genome rule will have a runtime of 120 minutes, will require 24G of RAM and 6 CPUs. Both rules will use the slurm_account and slurm_partition settings from the default_resources section, unless overridden in the rule-specific settings.\nYou can still define resources in the rule definition, but the values in the configuration profile will take precedence.\nNow, when you run your Snakemake workflow with:\nsnakemake --profile dardel\nSnakemake will submit each job to the SLURM queue and inform you about both the local jobid and the SLURM jobid by writing something similar to this to your terminal:\nJob 0 has been submitted with SLURM jobid 37099380 (log: .snakemake/slurm_logs/rule_name/37099380.log).\nIn this example the log output from the job will be in .snakemake/slurm_logs/rule_name/37099380.log.\nYou can read more details about running Snakemake on compute clusters in the Snakemake docs."
   }
 ]
\ No newline at end of file
diff --git a/sitemap.xml b/sitemap.xml
index 3875e2ea..415d77cc 100644
--- a/sitemap.xml
+++ b/sitemap.xml
@@ -4,10 +4,22 @@
     <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/take-down.html</loc>
     <lastmod>2024-10-04T16:44:47.576Z</lastmod>
   </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/quarto.html</loc>
+    <lastmod>2024-10-15T14:34:31.985Z</lastmod>
+  </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/markdown.html</loc>
+    <lastmod>2024-10-15T06:49:44.656Z</lastmod>
+  </url>
   <url>
     <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/introduction.html</loc>
     <lastmod>2024-10-04T16:44:47.568Z</lastmod>
   </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/containers.html</loc>
+    <lastmod>2024-10-15T06:55:22.700Z</lastmod>
+  </url>
   <url>
     <loc>https://nbisweden.github.io/workshop-reproducible-research/lectures/snakemake/snakemake.html</loc>
     <lastmod>2024-10-04T16:44:47.455Z</lastmod>
@@ -68,12 +80,24 @@
     <loc>https://nbisweden.github.io/workshop-reproducible-research/lectures/quarto/quarto.html</loc>
     <lastmod>2024-10-04T16:44:47.451Z</lastmod>
   </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/conda.html</loc>
+    <lastmod>2024-10-15T05:51:51.919Z</lastmod>
+  </url>
   <url>
     <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/git.html</loc>
     <lastmod>2024-10-04T16:44:47.528Z</lastmod>
   </url>
   <url>
-    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/markdown.html</loc>
-    <lastmod>2024-10-04T16:44:47.570Z</lastmod>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/jupyter.html</loc>
+    <lastmod>2024-10-15T14:15:07.462Z</lastmod>
+  </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/nextflow.html</loc>
+    <lastmod>2024-10-15T08:14:28.350Z</lastmod>
+  </url>
+  <url>
+    <loc>https://nbisweden.github.io/workshop-reproducible-research/pages/snakemake.html</loc>
+    <lastmod>2024-10-15T07:42:52.382Z</lastmod>
   </url>
 </urlset>