-
-
Notifications
You must be signed in to change notification settings - Fork 89
108 lines (91 loc) · 3.24 KB
/
evaluation_pipeline.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
name: Evaluate LLM Workflow and Post Comment
permissions:
contents: read
pull-requests: write
issues: write
on: workflow_dispatch
jobs:
evaluation-pipeline:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ./backend
steps:
- uses: actions/checkout@v4
- name: Install Python
uses: actions/setup-python@v4
with:
python-version: 3.11.6
- name: Install poetry
uses: abatilo/actions-poetry@v2
- name: Setup a local virtual environment (if no poetry.toml file)
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
- uses: actions/cache@v3
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: venv-${{ hashFiles('poetry.lock') }}
- name: Install the project dependencies
run: poetry install --with dev --no-interaction --no-root
- name: Run eval pipeline test cases
env:
DATA_DIRECTORY: ./
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: PYTHONPATH=. poetry run pytest tests/evaluation/test_query_flow.py --run-expensive
- name: Upload test results as artifact
uses: actions/upload-artifact@v3
with:
name: test-results.csv
path: ./backend/test_results.csv
- name: Run score aggregation script
run: poetry run python tests/evaluation/aggregate_scores.py
- name: Save scores as artifact
uses: actions/upload-artifact@v3
with:
name: scores.md
path: ./backend/scores.md
- name: Find associated pull request
uses: jwalton/gh-find-current-pr@v1
id: findPr
# Only run this on PRs
if: github.ref != 'refs/heads/main'
continue-on-error: true
with:
github-token: ${{ secrets.GITHUB_TOKEN }}
- name: Download main branch scores
if: github.ref != 'refs/heads/main'
uses: dawidd6/action-download-artifact@v6
with:
workflow: evaluation_pipeline.yml
branch: main
name: scores.md
path: ./backend/main-scores
- name: Compare scores and generate report
if: github.ref != 'refs/heads/main'
run: |
poetry run python tests/evaluation/compare_scores.py \
scores.md \
main-scores/scores.md
- name: Generate markdown comment
if: github.ref != 'refs/heads/main'
id: comparison
run: |
comparison=$(cat comparison.md )
echo "comparison<<EOF" >> $GITHUB_OUTPUT
echo "$comparison" >> $GITHUB_OUTPUT
echo "EOF" >> $GITHUB_OUTPUT
- name: Post comparison on PR
if: success() && steps.findPr.outputs.number
uses: marocchino/sticky-pull-request-comment@v2
with:
number: ${{ steps.findPr.outputs.number }}
message: |
${{ steps.comparison.outputs.comparison }}
- name: Upload comparison as artifact
if: github.ref != 'refs/heads/main'
uses: actions/upload-artifact@v3
with:
name: score-comparison
path: ./backend/comparison.md