diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5534be6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+build/*
+data/*
+*.env
+*.egg-info
+*.pyc
+*.rdb
+.vscode/*
+dist/*
+.DS_Store
+*.sqlite3
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..2dde64a
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,13 @@
+FROM alephdata/memorious:latest
+
+COPY setup.py /crawlers/
+COPY src /crawlers/src
+RUN pip3 install -q -e /crawlers
+COPY config /crawlers/config
+
+ENV MEMORIOUS_BASE_PATH=/data \
+    MEMORIOUS_CONFIG_PATH=/crawlers/config \
+    MEMORIOUS_DEBUG=false \
+    ARCHIVE_PATH=/data/archive \
+    REDIS_URL=redis://redis:6379/0 \
+    MEMORIOUS_DATASTORE_URI=postgresql://datastore:datastore@datastore/datastore \
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..0ec5f09
--- /dev/null
+++ b/README.md
@@ -0,0 +1,5 @@
+# Memorious example project
+
+This folder can be used as an example template of a memorious deployment.
+Copy it into it's own git repository as a starting point, then add your
+own crawlers and scrapers as needed.
diff --git a/config/covid19.yml b/config/covid19.yml
new file mode 100644
index 0000000..23ca9ca
--- /dev/null
+++ b/config/covid19.yml
@@ -0,0 +1,54 @@
+# Scraper for the OCCRP web site.
+# The goal is not to download all HTML, but only PDFs & other documents
+# linked from the page as proof.
+name: covid19
+
+# A title for display in the UI:
+description: "Covid19 NL"
+
+pipeline:
+  init:
+    # This first stage will get the ball rolling with a seed URL.
+    method: seed
+    params:
+      urls:
+        - https://do-ams3-17.hw.webhare.net/services/wobcovid19-prod-1/search/?first=0&count=10000&orderby=publicationdate
+    handle:
+      pass: fetch
+
+  fetch:
+    # Download the seed page
+    method: fetch
+    params:
+      # These rules specify which pages should be scraped or included:
+      rules:
+        and:
+          - domain: webhare.net
+          # - not:
+          #     or:
+          #       - mime_group: assets
+          #       - mime_group: images
+    handle:
+      pass: parse
+
+  parse:
+    # Parse the scraped pages to find if they contain additional links.
+    method: parse
+    params:
+      # Additional rules to determine if a scraped page should be stored or not.
+      # In this example, we're only keeping PDFs, word files, etc.
+      store:
+        or:
+          - mime_group: web
+          - mime_group: archives
+          - mime_group: documents
+    handle:
+      store: store
+      # this makes it a recursive web crawler:
+      fetch: fetch
+
+  store:
+    # Store the crawled documents to a directory
+    method: directory
+    params:
+      path: /data/results
diff --git a/config/extended_web_scraper.yml b/config/extended_web_scraper.yml
new file mode 100644
index 0000000..198a4be
--- /dev/null
+++ b/config/extended_web_scraper.yml
@@ -0,0 +1,43 @@
+# Example scraper to demonstrate extending Memorious
+name: quote_scraper
+description: Quotes to scraper
+# delay: 2
+pipeline:
+  init:
+    # The first stage logs in and creates an HTTP session which is used for subsequent requests.
+    method: example.quotes:login
+    params:
+      url: http://quotes.toscrape.com
+      username: fred
+      password: asdfasdf
+    handle:
+      pass: fetch
+  fetch:
+    # Download the page passed from the login stage.
+    method: fetch
+    params:
+      http_rate_limit: 60
+    handle:
+      pass: crawl
+  crawl:
+    # Crawl the HTML of the page passed in to extract specific things.
+    method: example.quotes:crawl
+    handle:
+      # If the 'fetch' rule is invoked, re-trigger the fetch stage
+      fetch: fetch
+      # If the 'cleanup' rule is invoked, delete the downloaded page from archive
+      cleanup: cleanup
+      # Otherwise, pass data on to the store stage
+      pass: store
+  store:
+    # Use a database to store structured data (which is either SQLite or otherwise set with the MEMORIOUS_DATASTORE_URI environment variable).
+    method: example.quotes:store
+    params:
+      table: example_quotes
+  cleanup:
+    method: cleanup_archive
+aggregator:
+  method: example.quotes:export
+  params:
+    table: example_quotes
+    filename: all_quotes.json
diff --git a/config/simple_article_scraper.yml b/config/simple_article_scraper.yml
new file mode 100644
index 0000000..20f63ba
--- /dev/null
+++ b/config/simple_article_scraper.yml
@@ -0,0 +1,53 @@
+# Example scraper to demonstrate Memorious XPath narrowing
+name: occrp_entity_scraper
+description: A simple scrape of all the existing OCCRP investigations
+# Uncomment to run this scraper automatically:
+# schedule: weekly
+pipeline:
+  init:
+    # Start URL
+    method: seed
+    params:
+      urls:
+        - https://www.occrp.org/en/investigations
+    handle:
+      pass: fetch
+  fetch:
+    # Download the page passed from the seed stage.
+    method: fetch
+    params:
+      rules:
+        and:
+          - pattern: '.*investigations.*'
+          - domain: occrp.org
+          - not:
+              or:
+                - domain: vis.occrp.org
+                - domain: tech.occrp.org
+                - domain: data.occrp.org
+                - mime_group: assets
+                - mime_group: images
+                - pattern: "https://www.occrp.org/en/component/.*"
+                - pattern: "https://www.occrp.org/en/donate.*"
+                - pattern: "https://www.occrp.org/.*start=.*"
+                - pattern: "https://www.occrp.org/ru/.*"
+    handle:
+      pass: parse
+  parse:
+    method: example.article:parse
+    params:
+      schema: Article
+      store:
+        and:
+          - mime_group: web
+      properties:
+        title: .//meta[@property="og:title"]/@content
+        author: .//meta[@name="author"]/@content
+        publishedAt: .//*[@class="date"]/text()
+        description: .//meta[@property="og:description"]/@content
+    handle:
+      store: store
+      fetch: fetch
+  store:
+    # Store the crawled document as an ftm entity
+    method: aleph_emit_entity
\ No newline at end of file
diff --git a/config/simple_web_scraper.yml b/config/simple_web_scraper.yml
new file mode 100644
index 0000000..a1c02f5
--- /dev/null
+++ b/config/simple_web_scraper.yml
@@ -0,0 +1,60 @@
+# Scraper for the OCCRP web site.
+# The goal is not to download all HTML, but only PDFs & other documents
+# linked from the page as proof.
+name: occrp_web_site
+
+# A title for display in the UI:
+description: "Organized Crime and Corruption Reporting Project"
+
+pipeline:
+  init:
+    # This first stage will get the ball rolling with a seed URL.
+    method: seed
+    params:
+      urls:
+        - https://occrp.org
+    handle:
+      pass: fetch
+
+  fetch:
+    # Download the seed page
+    method: fetch
+    params:
+      # These rules specify which pages should be scraped or included:
+      rules:
+        and:
+          - domain: occrp.org
+          - not:
+              or:
+                - domain: vis.occrp.org
+                - domain: tech.occrp.org
+                - domain: data.occrp.org
+                - mime_group: assets
+                - mime_group: images
+                - pattern: "https://www.occrp.org/en/component/.*"
+                - pattern: "https://www.occrp.org/en/donate.*"
+                - pattern: "https://www.occrp.org/.*start=.*"
+                - pattern: "https://www.occrp.org/ru/.*"
+    handle:
+      pass: parse
+
+  parse:
+    # Parse the scraped pages to find if they contain additional links.
+    method: parse
+    params:
+      # Additional rules to determine if a scraped page should be stored or not.
+      # In this example, we're only keeping PDFs, word files, etc.
+      store:
+        or:
+          - mime_group: archives
+          - mime_group: documents
+    handle:
+      store: store
+      # this makes it a recursive web crawler:
+      fetch: fetch
+
+  store:
+    # Store the crawled documents to a directory
+    method: directory
+    params:
+      path: /data/results
diff --git a/config/simple_web_scraper_2.yml b/config/simple_web_scraper_2.yml
new file mode 100644
index 0000000..efc8fa0
--- /dev/null
+++ b/config/simple_web_scraper_2.yml
@@ -0,0 +1,42 @@
+# Example scraper to demonstrate Memorious XPath narrowing
+name: book_scraper
+description: Books to scraper
+pipeline:
+  init:
+    # Start URL
+    method: seed
+    params:
+      urls:
+        - http://books.toscrape.com
+    handle:
+      pass: fetch
+  fetch:
+    # Download the page passed from the seed stage.
+    method: fetch
+    handle:
+      pass: parse
+  parse:
+    # Crawl the HTML of the page passed in to extract specific things.
+    method: parse
+    params:
+      # This only checks the <section> element for links to follow (effectively keeping only links to book pages and pagination, and skipping the sidebar which lists book categories).
+      include_paths:
+        - ".//section"
+      # This tells the parser to also extract additional metadata from the DOM, which is added to `data` and passed to the 'store' stage.
+      meta:
+        title: './/article[@class="product_page"]//h1'
+        price: './/article[@class="product_page"]//p[@class="price_color"]'
+      # It uses a regex rule to skip URLs with '/category/' in them, so it only stores the book pages and not the listings.
+      store:
+        not:
+          pattern: ".*/category/.*"
+    handle:
+      # If the 'fetch' rule is invoked, re-trigger the fetch stage
+      fetch: fetch
+      # Otherwise, pass data on to the store stage
+      store: store
+  store:
+    # Store the crawled documents to a directory
+    method: directory
+    params:
+      path: /data/results
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..8232ea6
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,30 @@
+version: "2"
+
+services:
+  datastore:
+    image: postgres:11.4
+    volumes:
+      - "./build/datastore:/var/lib/postgresql/data"
+    environment:
+      POSTGRES_USER: datastore
+      POSTGRES_PASSWORD: datastore
+
+  redis:
+    image: redis:alpine
+    command: ["redis-server", "--appendonly", "yes"]
+    volumes:
+      - ./build/redis-data:/data
+
+  shell:
+    build: .
+    command: /bin/bash
+    links:
+      - redis
+      - datastore
+    volumes:
+      - "./build/data:/data"
+      - "./config:/crawlers/config"
+      - "./src:/crawlers/src"
+      - "./entities:/crawlers/entities"
+    tmpfs:
+      - "/tmp"
diff --git a/scripts/worker.sh b/scripts/worker.sh
new file mode 100644
index 0000000..6b2a824
--- /dev/null
+++ b/scripts/worker.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+pip3 install -q -e /crawlers
+
+# For debugging inside a container, attach a terming and try:
+# python3 /tmp/debugpy --wait-for-client --listen 0.0.0.0:5678 memorious/cli.py --debug run book_scraper
+pip3 install debugpy -t /tmp
+/bin/bash
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..d60a352
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,16 @@
+from datetime import datetime
+from setuptools import setup, find_packages
+
+setup(
+    name="jodal-sources",
+    version=datetime.utcnow().date().isoformat(),
+    classifiers=[],
+    keywords="",
+    packages=find_packages("src"),
+    package_dir={"": "src"},
+    namespace_packages=[],
+    include_package_data=True,
+    zip_safe=False,
+    install_requires=["memorious", "datafreeze", "newspaper3k"],
+    entry_points={"memorious.plugins": ["example = example:init"]},
+)
diff --git a/src/example/__init__.py b/src/example/__init__.py
new file mode 100644
index 0000000..9835dc9
--- /dev/null
+++ b/src/example/__init__.py
@@ -0,0 +1,8 @@
+import os
+from memorious.core import manager
+
+
+def init():
+    file_path = os.path.dirname(__file__)
+    config_path = os.path.join(file_path, "..", "..", "config")
+    manager.load_path(config_path)
diff --git a/src/example/article.py b/src/example/article.py
new file mode 100644
index 0000000..ad3f1e0
--- /dev/null
+++ b/src/example/article.py
@@ -0,0 +1,49 @@
+import logging
+import memorious.operations.parse
+import hashlib
+
+from newspaper import Article
+from memorious.helpers.rule import Rule
+
+log = logging.getLogger(__name__)
+
+
+def parse_article(context: object, data: dict, article: Article) -> None:
+    with context.http.rehash(data) as result:
+        if result.html is not None:
+            properties = context.params.get("properties")
+            data["schema"] = "Article"
+            data["entity_id"] = hashlib.md5(data["url"].encode("utf-8")).hexdigest()
+            data["properties"] = {
+                "title": result.html.xpath(properties["title"])
+                if properties.get("title")
+                else getattr(article, "title", None),
+                "description": result.html.xpath(properties.get("description"))
+                if properties.get("description")
+                else getattr(article, "description", None),
+                "author": result.html.xpath(properties.get("author"))
+                if properties.get("author")
+                else getattr(article, "authors", None),
+                "publishedAt": result.html.xpath(properties.get("publishedAt"))
+                if properties.get("publishedAt")
+                else getattr(article, "publish_date", None),
+                "bodyText": result.html.xpath(properties.get("bodyText"))
+                if properties.get("bodyText")
+                else getattr(article, "text", None),
+            }
+
+
+def parse(context, data):
+    with context.http.rehash(data) as result:
+        news_article = Article(url=data["url"])
+        news_article.download()
+        news_article.parse()
+        parse_article(context, data, news_article)
+
+        if result.html is not None:
+            memorious.operations.parse.parse_for_metadata(context, data, result.html)
+            memorious.operations.parse.parse_html(context, data, result)
+
+        rules = context.params.get("match") or {"match_all": {}}
+        if Rule.get_rule(rules).apply(result):
+            context.emit(rule="store", data=data)
diff --git a/src/example/quotes.py b/src/example/quotes.py
new file mode 100644
index 0000000..e8b896d
--- /dev/null
+++ b/src/example/quotes.py
@@ -0,0 +1,76 @@
+from urllib.parse import urljoin
+import datafreeze
+
+
+def login(context, data):
+    # Get parameters from the stage which calls this method in the yaml file
+    base_url = context.params.get("url")
+    url = urljoin(base_url, "login")
+    username = context.params.get("username")
+    password = context.params.get("password")
+
+    # Context wraps requests, and reuses the same session.
+    # When we login here, this is persisted across future uses of
+    # context.http
+    res = context.http.get(url)
+    # Get the login form and post the credentials.
+    # Uses lxml under the hood.
+    page = res.html
+    form = page.find(".//form")
+    login_url = urljoin(base_url, form.get("action"))
+    login_data = {"username": username, "password": password}
+    # We also need to pass the hidden inputs from the form.
+    hidden_inputs = {
+        h_in.get("name"): h_in.get("value")
+        for h_in in form.xpath('./input[@type="hidden"]')
+    }
+    login_data.update(hidden_inputs)
+    context.http.post(login_url, data=login_data)
+
+    # Set data for input to the next stage, and proceed.
+    # (The next stage is 'fetch' which takes a 'url' input.)
+    data = {"url": base_url}
+    context.emit(data=data)
+
+
+def crawl(context, data):
+    # This stage comes after 'fetch' so the 'data' input contains an
+    # HTTPResponse object.
+    response = context.http.rehash(data)
+    url = response.url
+    page = response.html
+
+    # If we find a next link, recursively fetch that page by handing it back
+    # to the 'fetch' stage.
+    next_link = page.find('.//nav//li[@class="next"]/a')
+    if next_link is not None:
+        next_url = urljoin(url, next_link.get("href"))
+        context.emit(rule="fetch", data={"url": next_url})
+
+    # Parse the rest of the page to extract structured data.
+    for quote in page.findall('.//div[@class="quote"]'):
+        quote_data = {
+            "text": quote.find('.//span[@class="text"]').text_content(),
+            "author": quote.find('.//small[@class="author"]').text_content(),
+            "tags": ", ".join(
+                [tag.text_content() for tag in quote.findall('.//a[@class="tag"]')]
+            ),  # noqa
+        }
+
+        # If 'rule' is not set, it defaults to 'pass', which triggers the
+        # final 'store' stage.
+        context.emit(data=quote_data)
+    context.emit(rule="cleanup", data={"content_hash": response.content_hash})
+
+
+def store(context, data):
+    # This example uses a database to store structured data, which you can
+    # access through context.datastore.
+    table = context.datastore[context.params.get("table")]
+    # The data is passed in from context.emit of the previous 'crawl' stage.
+    table.upsert(data, ["text", "author"])
+
+
+def export(context, params):
+    table = context.datastore[params["table"]]
+    datafreeze.freeze(table, format="json", filename=params["filename"])