Skip to content

Alineaid Automated Scraper #172

Alineaid Automated Scraper

Alineaid Automated Scraper #172

Workflow file for this run

name: Alineaid Automated Scraper
on:
push:
branches:
- main
schedule:
- cron: "0 9 * * *"
# 16 - 7 = 9
jobs:
auto_commit:
runs-on: ubuntu-latest
steps:
- name: Set global directory
run: git config --global --add safe.directory /github/workspace
- uses: actions/checkout@v3
with:
persist-credentials: false
fetch-depth: 1
- name: Read pipeline schedule date
id: read_schedule
run: |
SCHEDULE_DATE=$(cat schedule.ctl)
echo "schedule_date=${SCHEDULE_DATE}" >> $GITHUB_ENV
- name: Get current date
id: get_date
run: echo "current_date=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
- name: Check if dates match
id: date_check
run: |
if [ "$schedule_date" = "$current_date" ]; then
echo "match=true" >> $GITHUB_ENV
else
echo "match=false" >> $GITHUB_ENV
fi
- name: Setup GPG for Chromium
if: env.match != 'true'
run: |
sudo bash -c 'cat > /etc/apt/sources.list.d/debian.list <<EOF
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster.gpg] http://deb.debian.org/debian buster main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-buster-updates.gpg] http://deb.debian.org/debian buster-updates main
deb [arch=amd64 signed-by=/usr/share/keyrings/debian-security-buster.gpg] http://deb.debian.org/debian-security buster/updates main
EOF'
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys DCC9EFBF77E11517
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 648ACFD622F3D138
sudo apt-key adv --keyserver keyserver.ubuntu.com --recv-keys 112695A0E562B32A
sudo apt-key export 77E11517 | gpg --dearmour -o /usr/share/keyrings/debian-buster.gpg
sudo apt-key export 22F3D138 | gpg --dearmour -o /usr/share/keyrings/debian-buster-updates.gpg
sudo apt-key export E562B32A | gpg --dearmour -o /usr/share/keyrings/debian-security-buster.gpg
sudo bash -c 'cat > /etc/apt/preferences.d/chromium.pref <<EOF
Package: *
Pin: release a=eoan
Pin-Priority: 500
Package: *
Pin: origin "deb.debian.org"
Pin-Priority: 300
Package: chromium*
Pin: origin "deb.debian.org"
Pin-Priority: 700
EOF'
- name: Install Cython, Selenium, Chromium, etc.
if: env.match != 'true'
run: |
sudo apt-get update && \
sudo apt-get install -y \
gcc python3-dev gnupg chromium chromium-driver
pip install selenium cython bs4
- name: Install Chromium v90.0.4430.72
if: env.match != 'true'
run: |
wget https://www.slimjet.com/chrome/download-chrome.php?file=files%2F90.0.4430.72%2Fgoogle-chrome-stable_current_amd64.deb
sudo dpkg -i $(ls | grep .deb)
rm $(ls | grep .deb)
google-chrome-stable --version
- name: Build Cython Modules and Run Scraper
if: env.match != 'true'
run: |
python setup.py build_ext --inplace
python scraper.py \
--iterations 1000 --workers 15
rm links.json
- name: Set Pipeline Schedule
if: env.match != 'true'
run: echo "$(date +'%Y-%m-%d')" > schedule.ctl
- name: Commit changes
if: env.match != 'true'
run: |
git config --local user.email "[email protected]"
git config --local user.name "belajarqywok"
git add -A
git commit -m "Update Datasets"
- name: Push changes
if: env.match != 'true'
uses: ad-m/github-push-action@master
with:
github_token: ${{ secrets.GH_TOKEN }}
branch: main