Skip to content

Evaluate LLM Workflow and Post Comment #17

Evaluate LLM Workflow and Post Comment

Evaluate LLM Workflow and Post Comment #17

name: Evaluate LLM Workflow and Post Comment
permissions:
contents: read
pull-requests: write
issues: write
on: workflow_dispatch
jobs:
evaluation-pipeline:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./backend
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: 3.11.6
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment (if no poetry.toml file)
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
- uses: actions/cache@v3
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}
- name: Install the project dependencies
run: poetry install --with dev --no-interaction --no-root
- name: Run eval pipeline test cases
env:
DATA_DIRECTORY: ./
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: PYTHONPATH=. poetry run pytest tests/evaluation/test_query_flow.py --run-expensive
- name: Run score aggregation script
run: poetry run python tests/evaluation/aggregate_scores.py
- name: Save scores as artifact
uses: actions/upload-artifact@v3
with:
name: scores.md
path: scores.md
- name: Find associated pull request
uses: jwalton/gh-find-current-pr@v1
id: findPr
# Only run this on PRs
if: github.ref != 'refs/heads/main'
continue-on-error: true
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Download main branch scores
if: github.ref != 'refs/heads/main'
uses: dawidd6/action-download-artifact@v2
with:
workflow: evaluation_pipeline.yml
branch: main
name: scores.md
path: ./backend/main-scores
- name: Compare scores and generate report
if: github.ref != 'refs/heads/main'
run: |
poetry run python tests/evaluation/compare_scores.py \
scores.md \
main-scores/scores.md \
comparison.md
- name: Post comparison on PR
if: success() && steps.findPr.outputs.number
uses: marocchino/sticky-pull-request-comment@v2
with:
number: ${{ steps.findPr.outputs.number }}
path: comparison.md
- name: Upload comparison as artifact
if: github.ref != 'refs/heads/main'
uses: actions/upload-artifact@v3
with:
name: score-comparison
path: comparison.md
- name: Upload test results as artifact
uses: actions/upload-artifact@v3
with:
name: test-results.csv
path: ./backend/test_results.csv