diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..659d6e5
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,12 @@
+# syntax=docker/dockerfile:1
+
+FROM python:3.11.5
+
+WORKDIR /app
+
+ADD app app
+
+COPY requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+
+CMD [ "python", "-m", "streamlit", "run", "app/main.py" ]
diff --git a/README.md b/README.md
index faa0cff..986a2b9 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,27 @@
-# Hack4Good - OECD
+# Hack4Good - NLP for policy trend analysis (OECD)
 
 [![Code style](https://img.shields.io/badge/code%20style-black-000000.svg)](pyproject.toml)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](.pre-commit-config.yaml)
 
-* https://hackmd.io/g5AYgepnQrqrk4V26DrQMg
+This project was created for the [Hack4Good 2023](https://www.analytics-club.org/hack4good) hackathon in collaboration with [OECD](https://www.oecd.org/switzerland/).
 
-* https://docs.google.com/spreadsheets/d/1pKb_1Je4hD2X8IfYrFXYqBhWfPg5lgPY/edit?usp=sharing&ouid=110500414719598262605&rtpof=true&sd=true
+## GUI Quickstart
 
-## Getting started
+Environment variables need to be set in order to run the code.
+Create a `.env` (you can use `cp .env.default .env`) file in the root of the repo with the following contents:
 
-### Conda Environment
-To run the code in this repo create a [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) environment with the required dependencies:
+| Environment Variable | Description |
+| --- | --- |
+| `ADOBE_CLIENT_ID` | Create Adobe Developer account and select "Get credentials" [here](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/) |
+| `ADOBE_CLIENT_SECRET` | Copy from "Get credentials" [here](https://developer.adobe.com/document-services/docs/overview/pdf-extract-api/) as with `ADOBE_CLIENT_ID` |
+| `OPENAI_API_KEY` | Get the [OpenAI API key](https://help.openai.com/en/articles/4936850-where-do-i-find-my-api-key) |
+
+After setting the environment variables, you can run the code in one of two ways:
+
+<details>
+<summary><b><font size="+1">Conda Environment</font></b></summary>
+
+1. Create a [conda](https://conda.io/projects/conda/en/latest/user-guide/install/index.html) environment with the required dependencies:
 
 To **create** a conda environment after cloning the repo:
 ```
@@ -22,7 +33,36 @@ conda activate hack4good
 conda deactivate
 ```
 
+(Optional) To **update** the conda environment after pulling latest changes:
+```
+conda activate hack4good
+conda env update -f environment.yml --prune
+```
+
+(Optional) To **remove** the conda environment:
+```
+conda deactivate
+conda env remove -n hack4good
+```
+
+2. Run the streamlit app
+```
+python -m streamlit run app/main.py
+```
+</details>
+
+<details>
+<summary><b><font size="+1">Docker</font></b></summary>
+
+1. Pull (or build) the docker image
+
+To **pull** the latest docker image:
+```
+docker pull ghc
+```
+</details>
+
 ## Contributing
 1. Install [pre-commit](https://pre-commit.com/#installation).
-2. `pre-commit install`
-3. Add changes, commit and pull request to `main` branch.
+2. Run `pre-commit install` to apply the repo's pre-commit hooks to your local git repo.
+3. Add your changes, commit and create a pull request with `main` branch as the target.
diff --git a/environment.yml b/environment.yml
index 28efbaf..aafc347 100644
--- a/environment.yml
+++ b/environment.yml
@@ -5,13 +5,4 @@ dependencies:
   - pip=23.2.1
   - python=3.11.5
   - pip:
-    - pdfplumber
-    - pdfminer.six
-    - tqdm
-    - torch
-    - nougat-ocr
-    - streamlit
-    - langchain
-    - python-dotenv
-    - openai
-    - python-multipart
+    - -r requirements.txt
diff --git a/requirements.in b/requirements.in
new file mode 100644
index 0000000..3c941d2
--- /dev/null
+++ b/requirements.in
@@ -0,0 +1,12 @@
+pdfplumber
+pdfminer.six
+tqdm
+torch
+nougat-ocr
+streamlit
+langchain
+python-dotenv
+openai
+python-multipart
+pdfservices-sdk
+requests
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..75c0fd8
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,469 @@
+#
+# This file is autogenerated by pip-compile with Python 3.11
+# by the following command:
+#
+#    pip-compile --resolver=backtracking requirements.in
+#
+aiohttp==3.9.1
+    # via
+    #   datasets
+    #   fsspec
+    #   langchain
+aiosignal==1.3.1
+    # via aiohttp
+albumentations==1.3.1
+    # via nougat-ocr
+altair==5.2.0
+    # via streamlit
+annotated-types==0.6.0
+    # via pydantic
+anyio==3.7.1
+    # via
+    #   httpx
+    #   langchain
+    #   openai
+attrs==23.2.0
+    # via
+    #   aiohttp
+    #   jsonschema
+    #   referencing
+blinker==1.7.0
+    # via streamlit
+build==0.9.0
+    # via pdfservices-sdk
+cachetools==5.3.2
+    # via streamlit
+certifi==2022.12.7
+    # via
+    #   httpcore
+    #   httpx
+    #   pdfservices-sdk
+    #   requests
+cffi==1.15.1
+    # via
+    #   cryptography
+    #   pdfservices-sdk
+chardet==5.2.0
+    # via pdfminer-six
+charset-normalizer==2.0.12
+    # via
+    #   pdfminer-six
+    #   requests
+click==8.1.7
+    # via
+    #   nltk
+    #   streamlit
+cryptography==3.4.6
+    # via
+    #   pdfminer-six
+    #   pdfservices-sdk
+dataclasses-json==0.6.3
+    # via langchain
+datasets[vision]==2.16.1
+    # via nougat-ocr
+dill==0.3.7
+    # via
+    #   datasets
+    #   multiprocess
+distro==1.9.0
+    # via openai
+filelock==3.13.1
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   torch
+    #   transformers
+frozenlist==1.4.1
+    # via
+    #   aiohttp
+    #   aiosignal
+fsspec[http]==2023.10.0
+    # via
+    #   datasets
+    #   huggingface-hub
+    #   lightning
+    #   pytorch-lightning
+    #   torch
+gitdb==4.0.11
+    # via gitpython
+gitpython==3.1.40
+    # via streamlit
+h11==0.14.0
+    # via httpcore
+httpcore==1.0.2
+    # via httpx
+httpx==0.26.0
+    # via openai
+huggingface-hub==0.20.2
+    # via
+    #   datasets
+    #   tokenizers
+    #   transformers
+idna==3.6
+    # via
+    #   anyio
+    #   httpx
+    #   requests
+    #   yarl
+imageio==2.33.1
+    # via scikit-image
+importlib-metadata==6.11.0
+    # via streamlit
+jinja2==3.1.2
+    # via
+    #   altair
+    #   pydeck
+    #   torch
+joblib==1.3.2
+    # via
+    #   nltk
+    #   scikit-learn
+jsonpatch==1.33
+    # via
+    #   langchain
+    #   langchain-core
+jsonpointer==2.4
+    # via jsonpatch
+jsonschema==4.20.0
+    # via altair
+jsonschema-specifications==2023.12.1
+    # via jsonschema
+langchain==0.0.347
+    # via -r requirements.in
+langchain-core==0.0.11
+    # via langchain
+langsmith==0.0.79
+    # via
+    #   langchain
+    #   langchain-core
+lazy-loader==0.3
+    # via scikit-image
+levenshtein==0.23.0
+    # via python-levenshtein
+lightning==2.1.3
+    # via nougat-ocr
+lightning-utilities==0.10.0
+    # via
+    #   lightning
+    #   pytorch-lightning
+    #   torchmetrics
+markdown-it-py==3.0.0
+    # via rich
+markupsafe==2.1.3
+    # via jinja2
+marshmallow==3.20.2
+    # via dataclasses-json
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+multidict==6.0.4
+    # via
+    #   aiohttp
+    #   yarl
+multipart==0.2.4
+    # via pdfservices-sdk
+multiprocess==0.70.15
+    # via datasets
+munch==4.0.0
+    # via sconf
+mypy-extensions==1.0.0
+    # via typing-inspect
+networkx==3.2.1
+    # via
+    #   scikit-image
+    #   torch
+nltk==3.8.1
+    # via nougat-ocr
+nougat-ocr==0.1.17
+    # via -r requirements.in
+numpy==1.26.3
+    # via
+    #   albumentations
+    #   altair
+    #   datasets
+    #   imageio
+    #   langchain
+    #   lightning
+    #   opencv-python-headless
+    #   pandas
+    #   pyarrow
+    #   pydeck
+    #   pytorch-lightning
+    #   qudida
+    #   scikit-image
+    #   scikit-learn
+    #   scipy
+    #   streamlit
+    #   tifffile
+    #   torchmetrics
+    #   torchvision
+    #   transformers
+openai==1.7.0
+    # via -r requirements.in
+opencv-python-headless==4.9.0.80
+    # via
+    #   albumentations
+    #   nougat-ocr
+    #   qudida
+orjson==3.9.10
+    # via nougat-ocr
+packaging==21.3
+    # via
+    #   altair
+    #   build
+    #   datasets
+    #   huggingface-hub
+    #   lightning
+    #   lightning-utilities
+    #   marshmallow
+    #   pdfservices-sdk
+    #   pytorch-lightning
+    #   scikit-image
+    #   streamlit
+    #   torchmetrics
+    #   transformers
+pandas==2.1.4
+    # via
+    #   altair
+    #   datasets
+    #   streamlit
+pdfminer-six==20220319
+    # via
+    #   -r requirements.in
+    #   pdfplumber
+pdfplumber==0.6.2
+    # via -r requirements.in
+pdfservices-sdk==2.3.0
+    # via -r requirements.in
+pep517==0.13.0
+    # via
+    #   build
+    #   pdfservices-sdk
+pillow==10.2.0
+    # via
+    #   datasets
+    #   imageio
+    #   pdfplumber
+    #   scikit-image
+    #   streamlit
+    #   torchvision
+polling==0.3.2
+    # via pdfservices-sdk
+polling2==0.5.0
+    # via pdfservices-sdk
+protobuf==4.25.1
+    # via streamlit
+pyarrow==14.0.2
+    # via
+    #   datasets
+    #   streamlit
+pyarrow-hotfix==0.6
+    # via datasets
+pycparser==2.21
+    # via
+    #   cffi
+    #   pdfservices-sdk
+pydantic==2.5.3
+    # via
+    #   langchain
+    #   langchain-core
+    #   langsmith
+    #   openai
+pydantic-core==2.14.6
+    # via pydantic
+pydeck==0.8.1b0
+    # via streamlit
+pygments==2.14.0
+    # via
+    #   pdfservices-sdk
+    #   rich
+pyjwt==2.4.0
+    # via pdfservices-sdk
+pyparsing==3.0.9
+    # via
+    #   packaging
+    #   pdfservices-sdk
+pypdf==3.17.4
+    # via nougat-ocr
+pypdfium2==4.25.0
+    # via
+    #   nougat-ocr
+    #   pdfplumber
+python-dateutil==2.8.2
+    # via
+    #   pandas
+    #   streamlit
+python-dotenv==1.0.0
+    # via -r requirements.in
+python-levenshtein==0.23.0
+    # via nougat-ocr
+python-multipart==0.0.6
+    # via -r requirements.in
+pytorch-lightning==2.1.3
+    # via lightning
+pytz==2023.3.post1
+    # via pandas
+pyyaml==6.0
+    # via
+    #   albumentations
+    #   datasets
+    #   huggingface-hub
+    #   langchain
+    #   lightning
+    #   pdfservices-sdk
+    #   pytorch-lightning
+    #   transformers
+qudida==0.0.4
+    # via albumentations
+rapidfuzz==3.6.1
+    # via levenshtein
+referencing==0.32.1
+    # via
+    #   jsonschema
+    #   jsonschema-specifications
+regex==2023.12.25
+    # via
+    #   nltk
+    #   transformers
+requests==2.27.1
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   fsspec
+    #   huggingface-hub
+    #   langchain
+    #   langsmith
+    #   pdfservices-sdk
+    #   requests-toolbelt
+    #   streamlit
+    #   torchvision
+    #   transformers
+requests-toolbelt==0.10.1
+    # via pdfservices-sdk
+rich==13.7.0
+    # via streamlit
+rpds-py==0.16.2
+    # via
+    #   jsonschema
+    #   referencing
+ruamel-yaml==0.18.5
+    # via sconf
+ruamel-yaml-clib==0.2.8
+    # via ruamel-yaml
+safetensors==0.4.1
+    # via transformers
+scikit-image==0.22.0
+    # via albumentations
+scikit-learn==1.3.2
+    # via qudida
+scipy==1.11.4
+    # via
+    #   albumentations
+    #   scikit-image
+    #   scikit-learn
+sconf==0.2.5
+    # via nougat-ocr
+sentencepiece==0.1.99
+    # via nougat-ocr
+six==1.16.0
+    # via
+    #   pdfservices-sdk
+    #   python-dateutil
+smmap==5.0.1
+    # via gitdb
+sniffio==1.3.0
+    # via
+    #   anyio
+    #   httpx
+    #   openai
+sqlalchemy==2.0.25
+    # via langchain
+streamlit==1.29.0
+    # via -r requirements.in
+sympy==1.12
+    # via torch
+tenacity==8.2.3
+    # via
+    #   langchain
+    #   langchain-core
+    #   streamlit
+threadpoolctl==3.2.0
+    # via scikit-learn
+tifffile==2023.12.9
+    # via scikit-image
+timm==0.5.4
+    # via nougat-ocr
+tokenizers==0.15.0
+    # via transformers
+toml==0.10.2
+    # via
+    #   pdfservices-sdk
+    #   streamlit
+toolz==0.12.0
+    # via altair
+torch==2.1.2
+    # via
+    #   -r requirements.in
+    #   lightning
+    #   pytorch-lightning
+    #   timm
+    #   torchmetrics
+    #   torchvision
+torchmetrics==1.2.1
+    # via
+    #   lightning
+    #   pytorch-lightning
+torchvision==0.16.2
+    # via timm
+tornado==6.4
+    # via streamlit
+tqdm==4.66.1
+    # via
+    #   -r requirements.in
+    #   datasets
+    #   huggingface-hub
+    #   lightning
+    #   nltk
+    #   openai
+    #   pytorch-lightning
+    #   transformers
+transformers==4.36.2
+    # via nougat-ocr
+typing-extensions==4.9.0
+    # via
+    #   huggingface-hub
+    #   lightning
+    #   lightning-utilities
+    #   openai
+    #   pydantic
+    #   pydantic-core
+    #   pytorch-lightning
+    #   qudida
+    #   sqlalchemy
+    #   streamlit
+    #   torch
+    #   typing-inspect
+typing-inspect==0.9.0
+    # via dataclasses-json
+tzdata==2023.4
+    # via pandas
+tzlocal==5.2
+    # via streamlit
+urllib3==1.26.13
+    # via
+    #   pdfservices-sdk
+    #   requests
+validators==0.22.0
+    # via streamlit
+wand==0.6.13
+    # via pdfplumber
+xxhash==3.4.1
+    # via datasets
+yarl==1.9.4
+    # via aiohttp
+zipp==3.17.0
+    # via importlib-metadata
+
+# The following packages are considered to be unsafe in a requirements file:
+# setuptools