forked from ArchiveBox/ArchiveBox
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdocker-compose.yml
140 lines (119 loc) · 6.81 KB
/
docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# Usage:
# docker compose run archivebox init --setup
# docker compose up
# echo "https://example.com" | docker compose run archivebox archivebox add
# docker compose run archivebox add --depth=1 https://example.com/some/feed.rss
# docker compose run archivebox config --set MEDIA_MAX_SIZE=750m
# docker compose run archivebox help
# Documentation:
# https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker-compose
version: '3.9'
services:
archivebox:
image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
command: server --quick-init 0.0.0.0:8000
ports:
- 8000:8000
volumes:
- ./data:/data
# - ./etc/crontabs:/var/spool/cron/crontabs # uncomment this and archivebox_scheduler below to set up automatic recurring archive jobs
# - ./archivebox:/app/archivebox # uncomment this to mount the ArchiveBox source code at runtime (for developers working on archivebox)
# build: . # uncomment this to build the image from source code at buildtime (for developers working on archivebox)
# dns: # uncomment this and pihole container below for ad-blocking during archiving
# - pihole
environment:
- ALLOWED_HOSTS=* # restrict this to only accept incoming traffic via specific domain name
# - MEDIA_MAX_SIZE=750m # increase this filesize limit to allow archiving larger audio/video files
# - TIMEOUT=60 # increase this number to 120+ seconds if you see many slow downloads timing out
# - CHECK_SSL_VALIDITY=True # set to False to disable strict SSL checking (allows saving URLs w/ broken certs)
# - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting all URLs to Archive.org when archiving
# - PUBLIC_INDEX=True # set to False to prevent anonymous users from viewing snapshot list
# - PUBLIC_SNAPSHOTS=True # set to False to prevent anonymous users from viewing snapshot content
# - PUBLIC_ADD_VIEW=False # set to True to allow anonymous users to submit new URLs to archive
# - PUID=1000 # set to your host user's UID & GID if you encounter permissions issues
# - PGID=1000
# - SEARCH_BACKEND_ENGINE=sonic # uncomment these and sonic container below for better full-text search
# - SEARCH_BACKEND_HOST_NAME=sonic
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# ...
# add further configuration options from archivebox/config.py as needed (to apply them only to this container)
# or set using `docker compose run archivebox config --set SOME_KEY=someval` (to persist config across all containers)
######## Optional Addons: tweak examples below as needed for your specific use case ########
### Example: To run the Sonic full-text search backend, first download the config file to sonic.cfg
# $ curl -O https://raw.githubusercontent.com/ArchiveBox/ArchiveBox/master/etc/sonic.cfg
# After starting, backfill any existing Snapshots into the full-text index:
# $ docker-compose run archivebox update --index-only
# sonic:
# image: valeriansaliou/sonic:latest
# expose:
# - 1491
# environment:
# - SEARCH_BACKEND_PASSWORD=SomeSecretPassword
# volumes:
# - ./sonic.cfg:/etc/sonic.cfg:ro
# - ./data/sonic:/var/lib/sonic/store
### Example: To run pihole in order to block ad/tracker requests during archiving,
# uncomment this block and set up pihole using its admin interface
# pihole:
# image: pihole/pihole:latest
# ports:
# - 8090:80 # uncomment to access the admin HTTP interface on http://localhost:8090
# environment:
# - WEBPASSWORD=SET_THIS_TO_SOME_SECRET_PASSWORD_FOR_ADMIN_DASHBOARD
# volumes:
# - ./etc/pihole:/etc/pihole
# - ./etc/dnsmasq:/etc/dnsmasq.d
### Example: Enable ability to run regularly scheduled archiving tasks by uncommenting this container
# $ docker compose run archivebox schedule --every=day --depth=1 'https://example.com/some/rss/feed.xml'
# then restart the scheduler container to apply the changes to the schedule
# $ docker compose restart archivebox_scheduler
# archivebox_scheduler:
# image: ${DOCKER_IMAGE:-archivebox/archivebox:dev}
# command: schedule --foreground
# environment:
# - MEDIA_MAX_SIZE=750m # increase this number to allow archiving larger audio/video files
# # - TIMEOUT=60 # increase if you see timeouts often during archiving / on slow networks
# # - ONLY_NEW=True # set to False to retry previously failed URLs when re-adding instead of skipping them
# # - CHECK_SSL_VALIDITY=True # set to False to allow saving URLs w/ broken SSL certs
# # - SAVE_ARCHIVE_DOT_ORG=True # set to False to disable submitting URLs to Archive.org when archiving
# # - PUID=502 # set to your host user's UID & GID if you encounter permissions issues
# # - PGID=20
# volumes:
# - ./data:/data
# - ./etc/crontabs:/var/spool/cron/crontabs
# # cpus: 2 # uncomment / edit these values to limit container resource consumption
# # mem_limit: 2048m
# # shm_size: 1024m
### Example: Put Nginx in front of the ArchiveBox server for SSL termination
# nginx:
# image: nginx:alpine
# ports:
# - 443:443
# - 80:80
# volumes:
# - ./etc/nginx.conf:/etc/nginx/nginx.conf
# - ./data:/var/www
### Example: run all your ArchiveBox traffic through a WireGuard VPN tunnel
# wireguard:
# image: linuxserver/wireguard:latest
# network_mode: 'service:archivebox'
# cap_add:
# - NET_ADMIN
# - SYS_MODULE
# sysctls:
# - net.ipv4.conf.all.rp_filter=2
# - net.ipv4.conf.all.src_valid_mark=1
# volumes:
# - /lib/modules:/lib/modules
# - ./wireguard.conf:/config/wg0.conf:ro
### Example: Run PYWB in parallel and auto-import WARCs from ArchiveBox
# pywb:
# image: webrecorder/pywb:latest
# entrypoint: /bin/sh -c '(wb-manager init default || test $$? -eq 2) && wb-manager add default /archivebox/archive/*/warc/*.warc.gz; wayback;'
# environment:
# - INIT_COLLECTION=archivebox
# ports:
# - 8080:8080
# volumes:
# - ./data:/archivebox
# - ./data/wayback:/webarchive