Skip to content

Scrape and Index Workflow Runs #876

Scrape and Index Workflow Runs

Scrape and Index Workflow Runs #876

Workflow file for this run

name: Scrape and Index Workflow Runs
on:
schedule:
- cron: "30 * * * *"
workflow_dispatch:
inputs:
name:
required: true
type: string
repository:
required: true
type: string
default: "cilium/cilium"
branch:
required: true
type: string
default: main
until:
required: true
type: string
since:
required: true
type: string
events:
required: false
type: string
default: push
env:
TARGET_INDEX: runs-oss
defaults:
run:
shell: bash
jobs:
setup-matrix:
runs-on: ubuntu-22.04
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- if: ${{ github.event_name == 'workflow_dispatch' }}
name: Prepare workflow_dispatch inputs
env:
INPUTS_JSON: ${{ toJSON(inputs) }}
run: |
echo $INPUTS_JSON | jq -c '{ "include": [.] }' > /tmp/matrix.json
- if: ${{ github.event_name == 'schedule' }}
name: Clone scheduled inputs
uses: actions/checkout@v4
with:
sparse-checkout: |
.github/scheduled.json
- if: ${{ github.event_name == 'schedule' }}
name: Prepare scheduled inputs
run: |
until=$(date '+%Y-%m-%dT%H')
since=$(date -d '-3 hours' '+%Y-%m-%dT%H')
m=$(jq -c '.include |= map(. + {"until": "'$until'", "since": "'$since'"})' < ./.github/scheduled.json)
echo $m > /tmp/matrix.json
- if: ${{ github.event_name != 'schedule' && github.event_name != 'workflow_dispatch' }}
run: |
exit 1
- id: set-matrix
run: |
cat /tmp/matrix.json
echo "matrix=$(cat /tmp/matrix.json)" >> $GITHUB_OUTPUT
scrape-and-index:
needs: setup-matrix
strategy:
# GitHub API doesn't like concurrent requests.
# Additionally, since our token is limited in the
# number of requests we can make, just do one
# repository at a time. If it takes longer that's ok
# since it reduces the chance we run into a limit.
max-parallel: 1
# Scrape jobs are independent, therefore one job's failure
# shouldn't impact another's.
fail-fast: false
matrix: ${{ fromJSON(needs.setup-matrix.outputs.matrix) }}
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Echo Matrix
run: |
echo '${{ toJSON(matrix) }}'
- name: Scrape Workflow Runs
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
go run . workflow runs \
--repository ${{ matrix.repository }} \
--branch ${{ matrix.branch }} \
--until ${{ matrix.until }} \
--since ${{ matrix.since }} \
--events ${{ matrix.events }} \
--index ${{ env.TARGET_INDEX }} \
--verbose > ./results.json
- name: Export to OpenSearch
uses: ./.github/actions/index
with:
artifact-name: ${{ matrix.name }}
bulk-file: ./results.json
temp-dir: /tmp
opensearch-url: ${{ secrets.OPENSEARCH_URL }}
opensearch-user: ${{ secrets.OPENSEARCH_USER }}
opensearch-pass: ${{ secrets.OPENSEARCH_PASS }}