Alineaid Automated Scraper #172
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Alineaid Automated Scraper | |
on: | |
push: | |
branches: | |
- main | |
schedule: | |
- cron: "0 9 * * *" | |
# 16 - 7 = 9 | |
jobs: | |
auto_commit: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Set global directory | |
run: git config --global --add safe.directory /github/workspace | |
- uses: actions/checkout@v3 | |
with: | |
persist-credentials: false | |
fetch-depth: 1 | |
- name: Read pipeline schedule date | |
id: read_schedule | |
run: | | |
SCHEDULE_DATE=$(cat schedule.ctl) | |
echo "schedule_date=${SCHEDULE_DATE}" >> $GITHUB_ENV | |
- name: Get current date | |
id: get_date | |
run: echo "current_date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV | |
- name: Check if dates match | |
id: date_check | |
run: | | |
if [ "$schedule_date" = "$current_date" ]; then | |
echo "match=true" >> $GITHUB_ENV | |
else | |
echo "match=false" >> $GITHUB_ENV | |
fi | |
- name: Setup GPG for Chromium | |
if: env.match != 'true' | |
run: | | |
sudo bash -c 'cat > /etc/apt/sources.list.d/debian.list <<EOF | |
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main | |
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main | |
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main | |
EOF' | |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517 | |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138 | |
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A | |
sudo apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg | |
sudo apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg | |
sudo apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg | |
sudo bash -c 'cat > /etc/apt/preferences.d/chromium.pref <<EOF | |
Package: * | |
Pin: release a=eoan | |
Pin-Priority: 500 | |
Package: * | |
Pin: origin "deb.debian.org" | |
Pin-Priority: 300 | |
Package: chromium* | |
Pin: origin "deb.debian.org" | |
Pin-Priority: 700 | |
EOF' | |
- name: Install Cython, Selenium, Chromium, etc. | |
if: env.match != 'true' | |
run: | | |
sudo apt-get update && \ | |
sudo apt-get install -y \ | |
gcc python3-dev gnupg chromium chromium-driver | |
pip install selenium cython bs4 | |
- name: Install Chromium v90.0.4430.72 | |
if: env.match != 'true' | |
run: | | |
wget https://www.slimjet.com/chrome/download-chrome.php?file=files%2F90.0.4430.72%2Fgoogle-chrome-stable_current_amd64.deb | |
sudo dpkg -i $(ls | grep .deb) | |
rm $(ls | grep .deb) | |
google-chrome-stable --version | |
- name: Build Cython Modules and Run Scraper | |
if: env.match != 'true' | |
run: | | |
python setup.py build_ext --inplace | |
python scraper.py \ | |
--iterations 1000 --workers 15 | |
rm links.json | |
- name: Set Pipeline Schedule | |
if: env.match != 'true' | |
run: echo "$(date +'%Y-%m-%d')" > schedule.ctl | |
- name: Commit changes | |
if: env.match != 'true' | |
run: | | |
git config --local user.email "[email protected]" | |
git config --local user.name "belajarqywok" | |
git add -A | |
git commit -m "Update Datasets" | |
- name: Push changes | |
if: env.match != 'true' | |
uses: ad-m/github-push-action@master | |
with: | |
github_token: ${{ secrets.GH_TOKEN }} | |
branch: main |