Generate CDSE Documentation Search Index #46
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Generate CDSE Documentation Search Index | |
| on: | |
| schedule: | |
| - cron: '0 0 * * *' # Every midnight | |
| workflow_dispatch: | |
| inputs: | |
| start_url: | |
| description: 'Starting URL for crawling (default: https://documentation.dataspace.copernicus.eu/)' | |
| required: false | |
| default: 'https://documentation.dataspace.copernicus.eu/' | |
| max_minutes: | |
| description: 'Maximum crawling time in minutes (default: 15)' | |
| required: false | |
| default: '15' | |
| concurrency: generate-search-index | |
| jobs: | |
| generate-index: | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Check out repository | |
| uses: actions/checkout@v3 | |
| - name: Set Git config | |
| run: | | |
| git config --local user.email "actions@github.com" | |
| git config --local user.name "Github Actions" | |
| - name: Create git subtree | |
| run: git subtree add --prefix search_index origin docportal-search-index | |
| - name: Set up Python | |
| uses: actions/setup-python@v4 | |
| with: | |
| python-version: '3.14' | |
| cache: 'pip' | |
| - name: Install dependencies | |
| run: pip install -r .github/scripts/requirements.txt | |
| - name: Generate search index | |
| env: | |
| START_URL: ${{ github.event.inputs.start_url || 'https://documentation.dataspace.copernicus.eu/' }} | |
| MAX_MINUTES: ${{ github.event.inputs.max_minutes || '15' }} | |
| run: | | |
| python .github/scripts/run.py scrape --start-url "$START_URL" --max-minutes "$MAX_MINUTES" | |
| - name: Commit | |
| run: | | |
| git add search_index | |
| if git diff --cached --quiet; then | |
| echo "No changes in search_index to commit" | |
| else | |
| git commit -m "search - update of index" | |
| git subtree push --prefix search_index origin docportal-search-index | |
| fi |