-
-
Notifications
You must be signed in to change notification settings - Fork 39
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #339 from Cloud-Code-AI/338-add-functionality-to-a…
…sk-question-to-pr feat: Ask a question functionality
- Loading branch information
Showing
4 changed files
with
325 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,3 @@ | ||
[flake8] | ||
exclude = docs/* | ||
exclude = docs/*, venv/* | ||
ignore = E501, W503, E203, F541, W293, W291, E266, F601 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from kaizen.reviewer.ask_question import QuestionAnswer | ||
from kaizen.llms.provider import LLMProvider | ||
from github_app.github_helper.utils import get_diff_text, get_pr_files | ||
import json | ||
import logging | ||
|
||
logging.basicConfig(level="DEBUG") | ||
|
||
# PR details | ||
pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch" | ||
pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files" | ||
pr_title = "feat: updated the prompt to provide solution" | ||
|
||
# Fetch PR data | ||
diff_text = get_diff_text(pr_diff, "") | ||
pr_files = get_pr_files(pr_files, "") | ||
|
||
# Initialize QuestionAnswer | ||
qa = QuestionAnswer(llm_provider=LLMProvider()) | ||
|
||
# Example questions | ||
questions = [ | ||
"What are the main changes in this pull request?", | ||
"Are there any potential performance implications in these changes?", | ||
"Does this PR introduce any new dependencies?", | ||
] | ||
|
||
# Ask questions about the PR | ||
for question in questions: | ||
print(f"\n----- Question: {question} -----") | ||
|
||
answer_output = qa.ask_pull_request( | ||
diff_text=diff_text, | ||
pull_request_title=pr_title, | ||
pull_request_desc="", | ||
question=question, | ||
pull_request_files=pr_files, | ||
user="kaizen/example", | ||
) | ||
|
||
print(f"Answer: {answer_output.answer}") | ||
print(f"Model: {answer_output.model_name}") | ||
print(f"Usage: {json.dumps(answer_output.usage, indent=2)}") | ||
print(f"Cost: {json.dumps(answer_output.cost, indent=2)}") | ||
|
||
# Check if a specific question's prompt is within token limit | ||
sample_question = "What are the coding style changes in this PR?" | ||
is_within_limit = qa.is_ask_question_prompt_within_limit( | ||
diff_text=diff_text, | ||
pull_request_title=pr_title, | ||
pull_request_desc="", | ||
question=sample_question, | ||
) | ||
print(f"\nIs the prompt for '{sample_question}' within token limit? {is_within_limit}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
ANSWER_QUESTION_SYSTEM_PROMPT = """ | ||
ou are an AI assistant specializing in software development and code review. Your role is to answer questions about pull requests accurately and comprehensively. When responding to questions: | ||
1. Analyze the provided code changes, pull request title, and description thoroughly. | ||
2. Provide clear, concise, and relevant answers based on the information given. | ||
3. If applicable, refer to specific code snippets or changes to support your answers. | ||
4. Consider various aspects such as code quality, performance implications, potential bugs, and adherence to best practices. | ||
5. Offer insights into the overall impact of the changes on the codebase. | ||
6. If a question cannot be fully answered with the given information, state this clearly and provide the best possible answer based on available data. | ||
7. Maintain a neutral, professional tone in your responses. | ||
8. Do not ask for additional information or clarification; work with what is provided. | ||
9. If relevant, suggest improvements or alternatives, but always in the context of answering the specific question asked. | ||
Your goal is to provide valuable insights that help developers and reviewers better understand the pull request and its implications. | ||
""" | ||
|
||
ANSWER_QUESTION_PROMPT = """ | ||
As an experienced software engineer, answer the following question about the given pull request. Use the provided information to give an accurate and helpful response. | ||
INFORMATION: | ||
Pull Request Title: {PULL_REQUEST_TITLE} | ||
Pull Request Description: {PULL_REQUEST_DESC} | ||
PATCH DATA: | ||
```{CODE_DIFF}``` | ||
QUESTION: | ||
{QUESTION} | ||
Please provide a concise and informative answer to the question, based on the pull request information and code changes. | ||
""" | ||
|
||
FILE_ANSWER_QUESTION_PROMPT = """ | ||
As an experienced software engineer, answer the following question about the given pull request. Use the provided information to give an accurate and helpful response. | ||
INFORMATION: | ||
Pull Request Title: {PULL_REQUEST_TITLE} | ||
Pull Request Description: {PULL_REQUEST_DESC} | ||
FILE PATCH: | ||
```{FILE_PATCH}``` | ||
QUESTION: | ||
{QUESTION} | ||
Please provide a concise and informative answer to the question, based on the pull request information and code changes. | ||
""" | ||
|
||
SUMMARIZE_ANSWER_PROMPT = """ | ||
As an experienced software engineer, analyze and summarize the following responses related to a question about a pull request. | ||
Each response corresponds to a different file or chunk of the pull request. | ||
QUESTION: | ||
{QUESTION} | ||
Responses: | ||
{RESPONSES} | ||
Please provide a concise and informative summary that addresses the original question based on all the given responses. | ||
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
from typing import Optional, List, Dict, Generator | ||
from dataclasses import dataclass | ||
import logging | ||
from kaizen.helpers import parser | ||
from kaizen.llms.provider import LLMProvider | ||
from kaizen.llms.prompts.ask_question_prompts import ( | ||
ANSWER_QUESTION_SYSTEM_PROMPT, | ||
ANSWER_QUESTION_PROMPT, | ||
FILE_ANSWER_QUESTION_PROMPT, | ||
SUMMARIZE_ANSWER_PROMPT, | ||
) | ||
|
||
|
||
@dataclass | ||
class AnswerOutput: | ||
answer: str | ||
usage: Dict[str, int] | ||
model_name: str | ||
cost: Dict[str, float] | ||
|
||
|
||
class QuestionAnswer: | ||
def __init__(self, llm_provider: LLMProvider): | ||
self.logger = logging.getLogger(__name__) | ||
self.provider = llm_provider | ||
self.provider.system_prompt = ANSWER_QUESTION_SYSTEM_PROMPT | ||
self.total_usage = { | ||
"prompt_tokens": 0, | ||
"completion_tokens": 0, | ||
"total_tokens": 0, | ||
} | ||
|
||
def is_ask_question_prompt_within_limit( | ||
self, | ||
diff_text: str, | ||
pull_request_title: str, | ||
pull_request_desc: str, | ||
question: str, | ||
) -> bool: | ||
prompt = ANSWER_QUESTION_PROMPT.format( | ||
PULL_REQUEST_TITLE=pull_request_title, | ||
PULL_REQUEST_DESC=pull_request_desc, | ||
CODE_DIFF=parser.patch_to_combined_chunks(diff_text), | ||
QUESTION=question, | ||
) | ||
return self.provider.is_inside_token_limit(PROMPT=prompt) | ||
|
||
def ask_pull_request( | ||
self, | ||
diff_text: str, | ||
pull_request_title: str, | ||
pull_request_desc: str, | ||
question: str, | ||
pull_request_files: List[Dict], | ||
user: Optional[str] = None, | ||
) -> AnswerOutput: | ||
prompt = ANSWER_QUESTION_PROMPT.format( | ||
PULL_REQUEST_TITLE=pull_request_title, | ||
PULL_REQUEST_DESC=pull_request_desc, | ||
CODE_DIFF=parser.patch_to_combined_chunks(diff_text), | ||
QUESTION=question, | ||
) | ||
self.total_usage = { | ||
"prompt_tokens": 0, | ||
"completion_tokens": 0, | ||
"total_tokens": 0, | ||
} | ||
if not diff_text and not pull_request_files: | ||
raise Exception("Both diff_text and pull_request_files are empty!") | ||
|
||
if diff_text and self.provider.is_inside_token_limit(PROMPT=prompt): | ||
resp = self._process_full_diff_qa(prompt, user) | ||
|
||
else: | ||
resp = self._process_files_qa( | ||
pull_request_files, | ||
pull_request_title, | ||
pull_request_desc, | ||
question, | ||
user, | ||
) | ||
|
||
prompt_cost, completion_cost = self.provider.get_usage_cost( | ||
total_usage=self.total_usage | ||
) | ||
|
||
return AnswerOutput( | ||
answer=resp, | ||
usage=self.total_usage, | ||
model_name=self.provider.model, | ||
cost={"prompt_cost": prompt_cost, "completion_cost": completion_cost}, | ||
) | ||
|
||
def _process_full_diff_qa( | ||
self, | ||
prompt: str, | ||
user: Optional[str], | ||
) -> str: | ||
self.logger.debug("Processing directly from diff") | ||
resp, usage = self.provider.chat_completion(prompt, user=user) | ||
self.total_usage = self.provider.update_usage(self.total_usage, usage) | ||
return resp | ||
|
||
def _process_files_qa( | ||
self, | ||
pull_request_files: List[Dict], | ||
pull_request_title: str, | ||
pull_request_desc: str, | ||
question: str, | ||
user: Optional[str], | ||
) -> str: | ||
self.logger.debug("Processing based on files") | ||
responses = [] | ||
for answer in self._process_files_generator_qa( | ||
pull_request_files, | ||
pull_request_title, | ||
pull_request_desc, | ||
question, | ||
user, | ||
): | ||
responses.append(answer) | ||
## summarize responses | ||
return self._summarize_responses(question, responses) | ||
|
||
def _process_files_generator_qa( | ||
self, | ||
pull_request_files: List[Dict], | ||
pull_request_title: str, | ||
pull_request_desc: str, | ||
question: str, | ||
user: Optional[str], | ||
) -> Generator[str, None, None]: | ||
combined_diff_data = "" | ||
available_tokens = self.provider.available_tokens(FILE_ANSWER_QUESTION_PROMPT) | ||
|
||
for file in pull_request_files: | ||
patch_details = file.get("patch") | ||
filename = file.get("filename", "") | ||
|
||
if ( | ||
filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES | ||
and patch_details is not None | ||
): | ||
temp_prompt = ( | ||
combined_diff_data | ||
+ f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details)}" | ||
) | ||
|
||
if available_tokens - self.provider.get_token_count(temp_prompt) > 0: | ||
combined_diff_data = temp_prompt | ||
continue | ||
|
||
yield self._process_file_chunk_qa( | ||
combined_diff_data, | ||
pull_request_title, | ||
pull_request_desc, | ||
question, | ||
user, | ||
) | ||
combined_diff_data = ( | ||
f"\n---->\nFile Name: {filename}\nPatch Details: {patch_details}" | ||
) | ||
|
||
if combined_diff_data: | ||
yield self._process_file_chunk_qa( | ||
combined_diff_data, | ||
pull_request_title, | ||
pull_request_desc, | ||
question, | ||
user, | ||
) | ||
|
||
def _process_file_chunk_qa( | ||
self, | ||
diff_data: str, | ||
pull_request_title: str, | ||
pull_request_desc: str, | ||
question: str, | ||
user: Optional[str], | ||
) -> str: | ||
if not diff_data: | ||
return "" | ||
prompt = FILE_ANSWER_QUESTION_PROMPT.format( | ||
PULL_REQUEST_TITLE=pull_request_title, | ||
PULL_REQUEST_DESC=pull_request_desc, | ||
FILE_PATCH=diff_data, | ||
QUESTION=question, | ||
) | ||
resp, usage = self.provider.chat_completion(prompt, user=user) | ||
self.total_usage = self.provider.update_usage(self.total_usage, usage) | ||
return resp | ||
|
||
def _summarize_responses(self, question: str, responses: List[str]) -> str: | ||
if len(responses) == 1: | ||
return responses[0] | ||
|
||
formatted_responses = "\n\n".join( | ||
f"Response for file/chunk {i + 1}:\n{response}" | ||
for i, response in enumerate(responses) | ||
) | ||
summary_prompt = SUMMARIZE_ANSWER_PROMPT.format( | ||
QUESTION=question, RESPONSES=formatted_responses | ||
) | ||
|
||
summarized_answer, usage = self.provider.chat_completion(summary_prompt) | ||
self.total_usage = self.provider.update_usage(self.total_usage, usage) | ||
|
||
return summarized_answer |