Skip to content

Daily Data Sync

Daily Data Sync #1118

name: "Daily Data Sync"
on:
# allow for kicking off data syncs manually
workflow_dispatch:
# run midnight (UTC) daily
schedule:
- cron: "0 0 * * *"
env:
CGO_ENABLED: "0"
SLACK_NOTIFICATIONS: true
jobs:
discover-providers:
name: "Discover vulnerability providers"
runs-on: runs-on=${{ github.run_id }}/runner=small
if: github.repository == 'anchore/grype-db' # only run for main repo
permissions:
contents: read
packages: read
outputs:
providers: ${{ steps.read-providers.outputs.providers }}
multicore-providers: ${{ steps.split-providers.outputs.multicore-providers }}
other-providers: ${{ steps.split-providers.outputs.other-providers }}
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 #v6.0.1
with:
persist-credentials: false
- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
python: false
- name: Login to ghcr.io
run: make ci-oras-ghcr-login
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
- name: Read configured providers
id: read-providers
# TODO: honor CI overrides
run: |
content=`make show-providers`
echo "providers=$content" >> $GITHUB_OUTPUT
- name: Split providers by concurrency needs
id: split-providers
run: |
all_providers='${{ steps.read-providers.outputs.providers }}'
multicore_providers=$(echo "$all_providers" | jq -c '[.[] | select(. == "ubuntu")]')
other_providers=$(echo "$all_providers" | jq -c '[.[] | select(. != "ubuntu")]')
echo "multicore-providers=$multicore_providers" >> $GITHUB_OUTPUT
echo "other-providers=$other_providers" >> $GITHUB_OUTPUT
update-provider-multicore:
name: "Update provider (multicore)"
needs: discover-providers
runs-on: runs-on=${{ github.run_id }}-multicore-${{ strategy.job-index }}/cpu=32/volume=80gb:gp3/family=r8+m8+r7+r6i+r6a+m7+m6i+m6a
timeout-minutes: 480
# set the permissions granted to the github token to publish to ghcr.io
permissions:
contents: read
packages: write
strategy:
matrix:
provider: ${{fromJson(needs.discover-providers.outputs.multicore-providers)}}
fail-fast: false
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 #v6.0.1
with:
persist-credentials: false
- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
python: false
- name: Login to ghcr.io
run: make ci-oras-ghcr-login
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
- name: Download the existing provider state
env:
PROVIDER: ${{ matrix.provider }}
run: bash -c "make download-provider-cache provider=\"$PROVIDER\" date=latest || true"
- name: Update the provider
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PROVIDER: ${{ matrix.provider }}
run: make refresh-provider-cache provider="$PROVIDER"
- uses: 8398a7/action-slack@77eaa4f1c608a7d68b38af4e3f739dcd8cba273e #v3.19.0
with:
status: ${{ job.status }}
fields: workflow,eventName
text: Daily Data Sync for ${{ matrix.provider }} failed
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_TOOLBOX_WEBHOOK_URL }}
if: ${{ failure() && env.SLACK_NOTIFICATIONS == 'true' }}
- name: Upload the provider workspace state
# even if the job fails, we want to upload yesterdays cache as todays cache to continue the DB build
if: ${{ always() }}
env:
PROVIDER: ${{ matrix.provider }}
run: make upload-provider-cache provider="$PROVIDER"
update-provider:
name: "Update provider"
needs: discover-providers
runs-on: runs-on=${{ github.run_id }}-provider-${{ strategy.job-index }}/runner=large
timeout-minutes: 480
# set the permissions granted to the github token to publish to ghcr.io
permissions:
contents: read
packages: write
strategy:
matrix:
provider: ${{fromJson(needs.discover-providers.outputs.other-providers)}}
fail-fast: false
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 #v6.0.1
with:
persist-credentials: false
- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
python: false
- name: Login to ghcr.io
run: make ci-oras-ghcr-login
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
- name: Download the existing provider state
env:
PROVIDER: ${{ matrix.provider }}
run: bash -c "make download-provider-cache provider=\"$PROVIDER\" date=latest || true"
- name: Update the provider
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PROVIDER: ${{ matrix.provider }}
run: make refresh-provider-cache provider="$PROVIDER"
- uses: 8398a7/action-slack@77eaa4f1c608a7d68b38af4e3f739dcd8cba273e #v3.19.0
with:
status: ${{ job.status }}
fields: workflow,eventName
text: Daily Data Sync for ${{ matrix.provider }} failed
env:
SLACK_WEBHOOK_URL: ${{ secrets.SLACK_TOOLBOX_WEBHOOK_URL }}
if: ${{ failure() && env.SLACK_NOTIFICATIONS == 'true' }}
- name: Upload the provider workspace state
# even if the job fails, we want to upload yesterdays cache as todays cache to continue the DB build
if: ${{ always() }}
env:
PROVIDER: ${{ matrix.provider }}
run: make upload-provider-cache provider="$PROVIDER"
aggregate-cache:
name: "Aggregate provider cache"
runs-on: runs-on=${{ github.run_id }}/runner=large
if: ${{ always() }}
needs:
- update-provider
- update-provider-multicore
- discover-providers
# set the permissions granted to the github token to read the pull cache from ghcr.io
permissions:
packages: write
contents: read
steps:
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 #v6.0.1
with:
persist-credentials: false
- name: Bootstrap environment
uses: ./.github/actions/bootstrap
with:
python: false
- name: Login to ghcr.io
run: make ci-oras-ghcr-login
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_USERNAME: ${{ github.actor }}
- name: Aggregate vulnerability data
run: make aggregate-all-provider-cache
env:
PROVIDERS_USED: ${{ needs.discover-providers.outputs.providers }}
- name: Upload vulnerability data cache image
run: make upload-all-provider-cache