Skip to content

Commit

Permalink
Merge pull request #339 from Cloud-Code-AI/338-add-functionality-to-a…
Browse files Browse the repository at this point in the history
…sk-question-to-pr

feat: Ask a question functionality
  • Loading branch information
sauravpanda authored Jul 22, 2024
2 parents 9494d68 + 19dd012 commit 0fb572c
Show file tree
Hide file tree
Showing 4 changed files with 325 additions and 1 deletion.
2 changes: 1 addition & 1 deletion .flake8
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[flake8]
exclude = docs/*
exclude = docs/*, venv/*
ignore = E501, W503, E203, F541, W293, W291, E266, F601
54 changes: 54 additions & 0 deletions examples/ask_question/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
from kaizen.reviewer.ask_question import QuestionAnswer
from kaizen.llms.provider import LLMProvider
from github_app.github_helper.utils import get_diff_text, get_pr_files
import json
import logging

logging.basicConfig(level="DEBUG")

# PR details
pr_diff = "https://github.com/Cloud-Code-AI/kaizen/pull/335.patch"
pr_files = "https://api.github.com/repos/Cloud-Code-AI/kaizen/pulls/335/files"
pr_title = "feat: updated the prompt to provide solution"

# Fetch PR data
diff_text = get_diff_text(pr_diff, "")
pr_files = get_pr_files(pr_files, "")

# Initialize QuestionAnswer
qa = QuestionAnswer(llm_provider=LLMProvider())

# Example questions
questions = [
"What are the main changes in this pull request?",
"Are there any potential performance implications in these changes?",
"Does this PR introduce any new dependencies?",
]

# Ask questions about the PR
for question in questions:
print(f"\n----- Question: {question} -----")

answer_output = qa.ask_pull_request(
diff_text=diff_text,
pull_request_title=pr_title,
pull_request_desc="",
question=question,
pull_request_files=pr_files,
user="kaizen/example",
)

print(f"Answer: {answer_output.answer}")
print(f"Model: {answer_output.model_name}")
print(f"Usage: {json.dumps(answer_output.usage, indent=2)}")
print(f"Cost: {json.dumps(answer_output.cost, indent=2)}")

# Check if a specific question's prompt is within token limit
sample_question = "What are the coding style changes in this PR?"
is_within_limit = qa.is_ask_question_prompt_within_limit(
diff_text=diff_text,
pull_request_title=pr_title,
pull_request_desc="",
question=sample_question,
)
print(f"\nIs the prompt for '{sample_question}' within token limit? {is_within_limit}")
62 changes: 62 additions & 0 deletions kaizen/llms/prompts/ask_question_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
ANSWER_QUESTION_SYSTEM_PROMPT = """
ou are an AI assistant specializing in software development and code review. Your role is to answer questions about pull requests accurately and comprehensively. When responding to questions:
1. Analyze the provided code changes, pull request title, and description thoroughly.
2. Provide clear, concise, and relevant answers based on the information given.
3. If applicable, refer to specific code snippets or changes to support your answers.
4. Consider various aspects such as code quality, performance implications, potential bugs, and adherence to best practices.
5. Offer insights into the overall impact of the changes on the codebase.
6. If a question cannot be fully answered with the given information, state this clearly and provide the best possible answer based on available data.
7. Maintain a neutral, professional tone in your responses.
8. Do not ask for additional information or clarification; work with what is provided.
9. If relevant, suggest improvements or alternatives, but always in the context of answering the specific question asked.
Your goal is to provide valuable insights that help developers and reviewers better understand the pull request and its implications.
"""

ANSWER_QUESTION_PROMPT = """
As an experienced software engineer, answer the following question about the given pull request. Use the provided information to give an accurate and helpful response.
INFORMATION:
Pull Request Title: {PULL_REQUEST_TITLE}
Pull Request Description: {PULL_REQUEST_DESC}
PATCH DATA:
```{CODE_DIFF}```
QUESTION:
{QUESTION}
Please provide a concise and informative answer to the question, based on the pull request information and code changes.
"""

FILE_ANSWER_QUESTION_PROMPT = """
As an experienced software engineer, answer the following question about the given pull request. Use the provided information to give an accurate and helpful response.
INFORMATION:
Pull Request Title: {PULL_REQUEST_TITLE}
Pull Request Description: {PULL_REQUEST_DESC}
FILE PATCH:
```{FILE_PATCH}```
QUESTION:
{QUESTION}
Please provide a concise and informative answer to the question, based on the pull request information and code changes.
"""

SUMMARIZE_ANSWER_PROMPT = """
As an experienced software engineer, analyze and summarize the following responses related to a question about a pull request.
Each response corresponds to a different file or chunk of the pull request.
QUESTION:
{QUESTION}
Responses:
{RESPONSES}
Please provide a concise and informative summary that addresses the original question based on all the given responses.
"""
208 changes: 208 additions & 0 deletions kaizen/reviewer/ask_question.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
from typing import Optional, List, Dict, Generator
from dataclasses import dataclass
import logging
from kaizen.helpers import parser
from kaizen.llms.provider import LLMProvider
from kaizen.llms.prompts.ask_question_prompts import (
ANSWER_QUESTION_SYSTEM_PROMPT,
ANSWER_QUESTION_PROMPT,
FILE_ANSWER_QUESTION_PROMPT,
SUMMARIZE_ANSWER_PROMPT,
)


@dataclass
class AnswerOutput:
answer: str
usage: Dict[str, int]
model_name: str
cost: Dict[str, float]


class QuestionAnswer:
def __init__(self, llm_provider: LLMProvider):
self.logger = logging.getLogger(__name__)
self.provider = llm_provider
self.provider.system_prompt = ANSWER_QUESTION_SYSTEM_PROMPT
self.total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
}

def is_ask_question_prompt_within_limit(
self,
diff_text: str,
pull_request_title: str,
pull_request_desc: str,
question: str,
) -> bool:
prompt = ANSWER_QUESTION_PROMPT.format(
PULL_REQUEST_TITLE=pull_request_title,
PULL_REQUEST_DESC=pull_request_desc,
CODE_DIFF=parser.patch_to_combined_chunks(diff_text),
QUESTION=question,
)
return self.provider.is_inside_token_limit(PROMPT=prompt)

def ask_pull_request(
self,
diff_text: str,
pull_request_title: str,
pull_request_desc: str,
question: str,
pull_request_files: List[Dict],
user: Optional[str] = None,
) -> AnswerOutput:
prompt = ANSWER_QUESTION_PROMPT.format(
PULL_REQUEST_TITLE=pull_request_title,
PULL_REQUEST_DESC=pull_request_desc,
CODE_DIFF=parser.patch_to_combined_chunks(diff_text),
QUESTION=question,
)
self.total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0,
}
if not diff_text and not pull_request_files:
raise Exception("Both diff_text and pull_request_files are empty!")

if diff_text and self.provider.is_inside_token_limit(PROMPT=prompt):
resp = self._process_full_diff_qa(prompt, user)

else:
resp = self._process_files_qa(
pull_request_files,
pull_request_title,
pull_request_desc,
question,
user,
)

prompt_cost, completion_cost = self.provider.get_usage_cost(
total_usage=self.total_usage
)

return AnswerOutput(
answer=resp,
usage=self.total_usage,
model_name=self.provider.model,
cost={"prompt_cost": prompt_cost, "completion_cost": completion_cost},
)

def _process_full_diff_qa(
self,
prompt: str,
user: Optional[str],
) -> str:
self.logger.debug("Processing directly from diff")
resp, usage = self.provider.chat_completion(prompt, user=user)
self.total_usage = self.provider.update_usage(self.total_usage, usage)
return resp

def _process_files_qa(
self,
pull_request_files: List[Dict],
pull_request_title: str,
pull_request_desc: str,
question: str,
user: Optional[str],
) -> str:
self.logger.debug("Processing based on files")
responses = []
for answer in self._process_files_generator_qa(
pull_request_files,
pull_request_title,
pull_request_desc,
question,
user,
):
responses.append(answer)
## summarize responses
return self._summarize_responses(question, responses)

def _process_files_generator_qa(
self,
pull_request_files: List[Dict],
pull_request_title: str,
pull_request_desc: str,
question: str,
user: Optional[str],
) -> Generator[str, None, None]:
combined_diff_data = ""
available_tokens = self.provider.available_tokens(FILE_ANSWER_QUESTION_PROMPT)

for file in pull_request_files:
patch_details = file.get("patch")
filename = file.get("filename", "")

if (
filename.split(".")[-1] not in parser.EXCLUDED_FILETYPES
and patch_details is not None
):
temp_prompt = (
combined_diff_data
+ f"\n---->\nFile Name: {filename}\nPatch Details: {parser.patch_to_combined_chunks(patch_details)}"
)

if available_tokens - self.provider.get_token_count(temp_prompt) > 0:
combined_diff_data = temp_prompt
continue

yield self._process_file_chunk_qa(
combined_diff_data,
pull_request_title,
pull_request_desc,
question,
user,
)
combined_diff_data = (
f"\n---->\nFile Name: {filename}\nPatch Details: {patch_details}"
)

if combined_diff_data:
yield self._process_file_chunk_qa(
combined_diff_data,
pull_request_title,
pull_request_desc,
question,
user,
)

def _process_file_chunk_qa(
self,
diff_data: str,
pull_request_title: str,
pull_request_desc: str,
question: str,
user: Optional[str],
) -> str:
if not diff_data:
return ""
prompt = FILE_ANSWER_QUESTION_PROMPT.format(
PULL_REQUEST_TITLE=pull_request_title,
PULL_REQUEST_DESC=pull_request_desc,
FILE_PATCH=diff_data,
QUESTION=question,
)
resp, usage = self.provider.chat_completion(prompt, user=user)
self.total_usage = self.provider.update_usage(self.total_usage, usage)
return resp

def _summarize_responses(self, question: str, responses: List[str]) -> str:
if len(responses) == 1:
return responses[0]

formatted_responses = "\n\n".join(
f"Response for file/chunk {i + 1}:\n{response}"
for i, response in enumerate(responses)
)
summary_prompt = SUMMARIZE_ANSWER_PROMPT.format(
QUESTION=question, RESPONSES=formatted_responses
)

summarized_answer, usage = self.provider.chat_completion(summary_prompt)
self.total_usage = self.provider.update_usage(self.total_usage, usage)

return summarized_answer

0 comments on commit 0fb572c

Please sign in to comment.