Skip to content

Commit 32e9cc0

Browse files
Jason XieArcadia822
Jason Xie
authored andcommitted
yeah
1 parent 433781c commit 32e9cc0

19 files changed

+7208
-5
lines changed

README.md

+39
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,12 @@ Codedog leverages Large Language Models (LLMs) like GPT to automatically review
1313
* **Code Review Suggestions**: Provides feedback and suggestions on code changes (experimental).
1414
* **Multi-language Support**: Includes templates for English and Chinese reports.
1515
* **Platform Support**: Works with GitHub and GitLab.
16+
* **Automated Code Review**: Uses LLMs to analyze code changes, provide feedback, and suggest improvements
17+
* **Scoring System**: Evaluates code across multiple dimensions, including correctness, readability, and maintainability
18+
* **Multiple LLM Support**: Works with OpenAI, Azure OpenAI, DeepSeek, and MindConnect R1 models
19+
* **Email Notifications**: Sends code review reports via email
20+
* **Commit-Triggered Reviews**: Automatically reviews code when commits are made
21+
* **Developer Evaluation**: Evaluates a developer's code over a specific time period
1622

1723
## Prerequisites
1824

@@ -68,6 +74,13 @@ Codedog uses environment variables for configuration. You can set these directly
6874
* `AZURE_OPENAI_DEPLOYMENT_ID="your_gpt_35_turbo_deployment_name"` (Used for code summaries/reviews)
6975
* `AZURE_OPENAI_GPT4_DEPLOYMENT_ID="your_gpt_4_deployment_name"` (Used for PR summary)
7076
* *(Optional)* `AZURE_OPENAI_API_VERSION="YYYY-MM-DD"` (Defaults to a recent preview version if not set)
77+
* **DeepSeek Models**: Set the following for DeepSeek models:
78+
* `DEEPSEEK_API_KEY="your_deepseek_api_key"`
79+
* *(Optional)* `DEEPSEEK_MODEL="deepseek-chat"` (Default model, options include: "deepseek-chat", "deepseek-coder", etc.)
80+
* *(Optional)* `DEEPSEEK_API_BASE="https://api.deepseek.com"` (Default API endpoint)
81+
* For **DeepSeek R1 model** specifically:
82+
* Set `DEEPSEEK_MODEL="deepseek-r1"`
83+
* *(Optional)* `DEEPSEEK_R1_API_BASE="https://your-r1-endpoint"` (If different from standard DeepSeek endpoint)
7184
7285
**Example `.env` file:**
7386
@@ -84,6 +97,32 @@ OPENAI_API_KEY="sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
8497
# AZURE_OPENAI_API_BASE="https://your-instance.openai.azure.com/"
8598
# AZURE_OPENAI_DEPLOYMENT_ID="gpt-35-turbo-16k"
8699
# AZURE_OPENAI_GPT4_DEPLOYMENT_ID="gpt-4-turbo"
100+
101+
# LLM (DeepSeek example)
102+
# DEEPSEEK_API_KEY="your_deepseek_api_key"
103+
# DEEPSEEK_MODEL="deepseek-chat"
104+
# DEEPSEEK_API_BASE="https://api.deepseek.com"
105+
106+
# LLM (DeepSeek R1 example)
107+
# DEEPSEEK_API_KEY="your_deepseek_api_key"
108+
# DEEPSEEK_MODEL="deepseek-r1"
109+
# DEEPSEEK_R1_API_BASE="https://your-r1-endpoint"
110+
111+
# LLM (MindConnect R1 example)
112+
# MINDCONNECT_API_KEY="your_mindconnect_api_key"
113+
114+
# Model selection (optional)
115+
CODE_SUMMARY_MODEL="gpt-3.5"
116+
PR_SUMMARY_MODEL="gpt-4"
117+
CODE_REVIEW_MODEL="deepseek" # Can use "deepseek" or "deepseek-r1" here
118+
119+
# Email notification (optional)
120+
EMAIL_ENABLED="true"
121+
NOTIFICATION_EMAILS="your_email@example.com,another_email@example.com"
122+
SMTP_SERVER="smtp.gmail.com"
123+
SMTP_PORT="587"
124+
SMTP_USERNAME="your_email@gmail.com"
125+
SMTP_PASSWORD="your_app_password"
87126
```
88127
89128
## Running the Example (Quickstart)
+161-4
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
import json
2+
import re
3+
from typing import Dict, List, Tuple, Any
4+
15
from codedog.actors.reporters.base import Reporter
26
from codedog.localization import Localization
37
from codedog.models.code_review import CodeReview
@@ -7,6 +11,7 @@ class CodeReviewMarkdownReporter(Reporter, Localization):
711
def __init__(self, code_reviews: list[CodeReview], language="en"):
812
self._code_reviews: list[CodeReview] = code_reviews
913
self._markdown: str = ""
14+
self._scores: List[Dict] = []
1015

1116
super().__init__(language=language)
1217

@@ -16,17 +21,169 @@ def report(self) -> str:
1621

1722
return self._markdown
1823

24+
def _extract_scores(self, review_text: str, file_name: str) -> Dict[str, Any]:
25+
"""Extract scores from the review text using a simple format."""
26+
# Default empty score data
27+
default_scores = {
28+
"file": file_name,
29+
"scores": {
30+
"correctness": 0,
31+
"readability": 0,
32+
"maintainability": 0,
33+
"standards_compliance": 0,
34+
"performance": 0,
35+
"security": 0,
36+
"overall": 0
37+
}
38+
}
39+
40+
try:
41+
# Look for the scores section
42+
scores_section = re.search(r'#{1,3}\s*SCORES:\s*([\s\S]*?)(?=#{1,3}|$)', review_text)
43+
if not scores_section:
44+
print(f"No scores section found for {file_name}")
45+
return default_scores
46+
47+
scores_text = scores_section.group(1)
48+
49+
# Extract individual scores
50+
correctness = self._extract_score(scores_text, "Correctness")
51+
readability = self._extract_score(scores_text, "Readability")
52+
maintainability = self._extract_score(scores_text, "Maintainability")
53+
standards = self._extract_score(scores_text, "Standards Compliance")
54+
performance = self._extract_score(scores_text, "Performance")
55+
security = self._extract_score(scores_text, "Security")
56+
overall = self._extract_score(scores_text, "Overall")
57+
58+
# Update scores if found
59+
if any([correctness, readability, maintainability, standards, performance, security, overall]):
60+
return {
61+
"file": file_name,
62+
"scores": {
63+
"correctness": correctness or 0,
64+
"readability": readability or 0,
65+
"maintainability": maintainability or 0,
66+
"standards_compliance": standards or 0,
67+
"performance": performance or 0,
68+
"security": security or 0,
69+
"overall": overall or 0
70+
}
71+
}
72+
73+
except Exception as e:
74+
print(f"Error extracting scores from review for {file_name}: {e}")
75+
76+
return default_scores
77+
78+
def _extract_score(self, text: str, dimension: str) -> float:
79+
"""Extract a score for a specific dimension from text."""
80+
try:
81+
# Find patterns like "Correctness: 4.5 /5" or "- Readability: 3.8/5"
82+
pattern = rf'[-\s]*{dimension}:\s*(\d+(?:\.\d+)?)\s*\/?5'
83+
match = re.search(pattern, text, re.IGNORECASE)
84+
if match:
85+
return float(match.group(1))
86+
except Exception as e:
87+
print(f"Error extracting {dimension} score: {e}")
88+
return 0
89+
90+
def _calculate_average_scores(self) -> Dict:
91+
"""Calculate the average scores across all files."""
92+
if not self._scores:
93+
return {
94+
"avg_correctness": 0,
95+
"avg_readability": 0,
96+
"avg_maintainability": 0,
97+
"avg_standards": 0,
98+
"avg_performance": 0,
99+
"avg_security": 0,
100+
"avg_overall": 0
101+
}
102+
103+
total_files = len(self._scores)
104+
avg_scores = {
105+
"avg_correctness": sum(s["scores"]["correctness"] for s in self._scores) / total_files,
106+
"avg_readability": sum(s["scores"]["readability"] for s in self._scores) / total_files,
107+
"avg_maintainability": sum(s["scores"]["maintainability"] for s in self._scores) / total_files,
108+
"avg_standards": sum(s["scores"]["standards_compliance"] for s in self._scores) / total_files,
109+
"avg_performance": sum(s["scores"]["performance"] for s in self._scores) / total_files,
110+
"avg_security": sum(s["scores"]["security"] for s in self._scores) / total_files,
111+
"avg_overall": sum(s["scores"]["overall"] for s in self._scores) / total_files
112+
}
113+
114+
return avg_scores
115+
116+
def _get_quality_assessment(self, avg_overall: float) -> str:
117+
"""Generate a quality assessment based on the average overall score."""
118+
if avg_overall >= 4.5:
119+
return "Excellent code quality. The PR demonstrates outstanding adherence to best practices and coding standards."
120+
elif avg_overall >= 4.0:
121+
return "Very good code quality. The PR shows strong adherence to standards with only minor improvement opportunities."
122+
elif avg_overall >= 3.5:
123+
return "Good code quality. The PR meets most standards but has some areas for improvement."
124+
elif avg_overall >= 3.0:
125+
return "Satisfactory code quality. The PR is acceptable but has several areas that could be improved."
126+
elif avg_overall >= 2.0:
127+
return "Needs improvement. The PR has significant issues that should be addressed before merging."
128+
else:
129+
return "Poor code quality. The PR has major issues that must be fixed before it can be accepted."
130+
131+
def _generate_summary_table(self) -> str:
132+
"""Generate a summary table of all file scores."""
133+
if not self._scores:
134+
return ""
135+
136+
file_score_rows = []
137+
for score in self._scores:
138+
file_name = score["file"]
139+
s = score["scores"]
140+
file_score_rows.append(
141+
f"| {file_name} | {s['correctness']:.2f} | {s['readability']:.2f} | {s['maintainability']:.2f} | "
142+
f"{s['standards_compliance']:.2f} | {s['performance']:.2f} | {s['security']:.2f} | {s['overall']:.2f} |"
143+
)
144+
145+
avg_scores = self._calculate_average_scores()
146+
quality_assessment = self._get_quality_assessment(avg_scores["avg_overall"])
147+
148+
return self.template.PR_REVIEW_SUMMARY_TABLE.format(
149+
file_scores="\n".join(file_score_rows),
150+
avg_correctness=avg_scores["avg_correctness"],
151+
avg_readability=avg_scores["avg_readability"],
152+
avg_maintainability=avg_scores["avg_maintainability"],
153+
avg_standards=avg_scores["avg_standards"],
154+
avg_performance=avg_scores["avg_performance"],
155+
avg_security=avg_scores["avg_security"],
156+
avg_overall=avg_scores["avg_overall"],
157+
quality_assessment=quality_assessment
158+
)
159+
19160
def _generate_report(self):
20161
code_review_segs = []
162+
21163
for code_review in self._code_reviews:
164+
# Extract scores if the review is not empty
165+
if hasattr(code_review, 'review') and code_review.review.strip():
166+
file_name = code_review.file.full_name if hasattr(code_review, 'file') and hasattr(code_review.file, 'full_name') else "Unknown"
167+
score_data = self._extract_scores(code_review.review, file_name)
168+
self._scores.append(score_data)
169+
170+
# Add the review text (without modification)
22171
code_review_segs.append(
23172
self.template.REPORT_CODE_REVIEW_SEGMENT.format(
24-
full_name=code_review.file.full_name,
25-
url=code_review.file.diff_url,
26-
review=code_review.review,
173+
full_name=code_review.file.full_name if hasattr(code_review, 'file') and hasattr(code_review.file, 'full_name') else "Unknown",
174+
url=code_review.file.diff_url if hasattr(code_review, 'file') and hasattr(code_review.file, 'diff_url') else "#",
175+
review=code_review.review if hasattr(code_review, 'review') else "",
27176
)
28177
)
29178

30-
return self.template.REPORT_CODE_REVIEW.format(
179+
# Generate review content
180+
review_content = self.template.REPORT_CODE_REVIEW.format(
31181
feedback="\n".join(code_review_segs) if code_review_segs else self.template.REPORT_CODE_REVIEW_NO_FEEDBACK,
32182
)
183+
184+
# Add summary table at the end if we have scores
185+
summary_table = self._generate_summary_table()
186+
if summary_table:
187+
review_content += "\n\n" + summary_table
188+
189+
return review_content

codedog/templates/grimoire_en.py

+100-1
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,86 @@
134134
"""
135135

136136
CODE_SUGGESTION = """Act as a Code Reviewer Assistant. I will give a code diff content.
137-
And I want you to check whether the code change is correct and give some suggestions to the author.
137+
And I want you to review the code changes, provide detailed feedback, and score the changes based on language-specific standards and best practices.
138+
139+
## Review Requirements:
140+
1. Check correctness and logic of the code changes
141+
2. Evaluate adherence to language-specific coding standards
142+
3. Identify potential bugs, performance issues, or security vulnerabilities
143+
4. Provide specific, actionable suggestions for improvement
144+
5. Score the code in multiple dimensions (see scoring system below)
145+
146+
## Language-Specific Standards:
147+
{language} code should follow these standards:
148+
149+
### Python:
150+
- PEP 8 style guide (spacing, naming conventions, line length)
151+
- Proper docstrings (Google, NumPy, or reST style)
152+
- Type hints for function parameters and return values
153+
- Error handling with specific exceptions
154+
- Avoid circular imports and global variables
155+
- Follow SOLID principles and avoid anti-patterns
156+
157+
### JavaScript/TypeScript:
158+
- ESLint/TSLint standards
159+
- Proper async/await or Promise handling
160+
- Consistent styling (following project's style guide)
161+
- Proper error handling
162+
- Type definitions (for TypeScript)
163+
- Avoid direct DOM manipulation in frameworks
164+
165+
### Java:
166+
- Follow Oracle Code Conventions
167+
- Proper exception handling
168+
- Appropriate access modifiers
169+
- Clear Javadoc comments
170+
- Correct resource management and memory handling
171+
- Follow SOLID principles
172+
173+
### General (for all languages):
174+
- DRY (Don't Repeat Yourself) principle
175+
- Clear naming conventions
176+
- Appropriate comments for complex logic
177+
- Proper error handling
178+
- Security best practices
179+
180+
## Scoring System (1-5 scale, where 5 is excellent):
181+
- **Correctness** (does the code function as intended?)
182+
- **Readability** (is the code easy to understand?)
183+
- **Maintainability** (how easy will this code be to maintain?)
184+
- **Standards Compliance** (does it follow language/framework conventions?)
185+
- **Performance** (any obvious performance issues?)
186+
- **Security** (any security concerns?)
187+
188+
## Overall Score:
189+
- Calculate a weighted average as follows:
190+
- Correctness: 30%
191+
- Readability: 20%
192+
- Maintainability: 20%
193+
- Standards Compliance: 15%
194+
- Performance: 10%
195+
- Security: 5%
196+
197+
## Format your review as follows:
198+
1. Brief summary of the changes (1-2 sentences)
199+
2. Detailed feedback with line references where appropriate
200+
3. Specific suggestions for improvement
201+
4. Scoring table with justifications for each dimension
202+
5. Overall score with brief conclusion
203+
204+
## IMPORTANT: Scores Summary
205+
At the end of your review, include a clearly formatted score summary section like this:
206+
207+
### SCORES:
208+
- Correctness: [score] /5
209+
- Readability: [score] /5
210+
- Maintainability: [score] /5
211+
- Standards Compliance: [score] /5
212+
- Performance: [score] /5
213+
- Security: [score] /5
214+
- Overall: [calculated_overall_score] /5
215+
216+
Replace [score] with your actual numeric scores (e.g., 4.5).
138217
139218
Here's the code diff from file {name}:
140219
```{language}
@@ -154,3 +233,23 @@
154233
Note that the content might be used in markdown or other formatted text,
155234
so don't change the paragraph layout of the content or add symbols.
156235
Your translation:"""
236+
237+
# Template for the summary score table at the end of PR review
238+
PR_REVIEW_SUMMARY_TABLE = """
239+
## PR Review Summary
240+
241+
| File | Correctness | Readability | Maintainability | Standards | Performance | Security | Overall |
242+
|------|-------------|-------------|----------------|-----------|-------------|----------|---------|
243+
{file_scores}
244+
| **Average** | **{avg_correctness:.2f}** | **{avg_readability:.2f}** | **{avg_maintainability:.2f}** | **{avg_standards:.2f}** | **{avg_performance:.2f}** | **{avg_security:.2f}** | **{avg_overall:.2f}** |
245+
246+
### Score Legend:
247+
- 5.00: Excellent
248+
- 4.00-4.99: Very Good
249+
- 3.00-3.99: Good
250+
- 2.00-2.99: Needs Improvement
251+
- 1.00-1.99: Poor
252+
253+
### PR Quality Assessment:
254+
{quality_assessment}
255+
"""

codedog/templates/template_en.py

+19
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,25 @@
8989

9090
REPORT_CODE_REVIEW_NO_FEEDBACK = """No suggestions for this PR."""
9191

92+
# --- Code Review Summary Table -----------------------------------------------
93+
PR_REVIEW_SUMMARY_TABLE = """
94+
## PR Review Summary
95+
96+
| File | Correctness | Readability | Maintainability | Standards | Performance | Security | Overall |
97+
|------|-------------|-------------|----------------|-----------|-------------|----------|---------|
98+
{file_scores}
99+
| **Average** | **{avg_correctness:.2f}** | **{avg_readability:.2f}** | **{avg_maintainability:.2f}** | **{avg_standards:.2f}** | **{avg_performance:.2f}** | **{avg_security:.2f}** | **{avg_overall:.2f}** |
100+
101+
### Score Legend:
102+
- 5.00: Excellent
103+
- 4.00-4.99: Very Good
104+
- 3.00-3.99: Good
105+
- 2.00-2.99: Needs Improvement
106+
- 1.00-1.99: Poor
107+
108+
### PR Quality Assessment:
109+
{quality_assessment}
110+
"""
92111

93112
# --- Materials ---------------------------------------------------------------
94113

0 commit comments

Comments
 (0)