Skip to content

Commit

Permalink
Merge pull request #227 from Cloud-Code-AI/226-add-double-parsing-for…
Browse files Browse the repository at this point in the history
…-json

fix: added llm based json parsing handling
  • Loading branch information
sauravpanda authored Jun 21, 2024
2 parents 961f039 + 6f34531 commit b4b9799
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 27 deletions.
6 changes: 1 addition & 5 deletions kaizen/generator/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,7 @@ def generate_module_tests(self, web_content: str, test_modules: dict, web_url: s
This method generates UI testing points for all modules.
"""
ui_tests = test_modules
total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
for module in ui_tests:
for test in module["tests"]:
test_description = test["test_description"]
Expand Down
13 changes: 13 additions & 0 deletions kaizen/helpers/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ def extract_json(text):
return parsed_data


def extract_json_with_llm_retry(provider, text, total_usage):
try:
json_data = extract_json(text)
return json_data, total_usage
except Exception as e:
print(f"Error parsing json: {e}")
prompt = f"Help me fix this json data: {text}"
resp, usage = provider.chat_completion(prompt)
total_usage = provider.update_usage(total_usage, usage)
json_data = extract_json(resp)
return json_data, total_usage


def extract_multi_json(text):
start_index = text.find("[")
end_index = text.rfind("]") + 1
Expand Down
2 changes: 1 addition & 1 deletion kaizen/llms/prompts/work_summary_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,4 +39,4 @@
7. Maintain a consistent, readable tone.
PATCH DATA: {PATCH_DATA}
"""
"""
38 changes: 22 additions & 16 deletions kaizen/reviewer/code_review.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,15 +63,13 @@ def review_pull_request(
PULL_REQUEST_DESC=pull_request_desc,
CODE_DIFF=diff_text,
)
total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
if self.provider.is_inside_token_limit(PROMPT=prompt):
self.logger.debug("Processing Directly from Diff")
resp, usage = self.provider.chat_completion(prompt, user=user)
review_json = parser.extract_json(resp)
review_json, total_usage = parser.extract_json_with_llm_retry(
self.provider, resp, total_usage
)
reviews = review_json["review"]
total_usage = self.provider.update_usage(total_usage, usage)
else:
Expand All @@ -90,12 +88,16 @@ def review_pull_request(
PULL_REQUEST_DESC=pull_request_desc,
FILE_PATCH=patch_details,
)
if not self.provider.is_inside_token_limit(PROMPT=prompt, percentage=85):
if not self.provider.is_inside_token_limit(
PROMPT=prompt, percentage=85
):
# TODO: Chunk this big files and process them
continue
resp, usage = self.provider.chat_completion(prompt, user=user)
total_usage = self.provider.update_usage(total_usage, usage)
review_json = parser.extract_json(resp)
review_json, total_usage = parser.extract_json_with_llm_retry(
self.provider, resp, total_usage
)
reviews.extend(review_json["review"])

topics = self.merge_topics(reviews=reviews)
Expand Down Expand Up @@ -128,16 +130,15 @@ def generate_pull_request_desc(
CODE_DIFF=diff_text,
)

total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
if self.provider.is_inside_token_limit(PROMPT=prompt):
self.logger.debug("Processing Directly from Diff")
resp, usage = self.provider.chat_completion(prompt, user=user)
total_usage = self.provider.update_usage(total_usage, usage)
desc = parser.extract_json(resp)["desc"]
json_data = parser.extract_json_with_llm_retry(
self.provider, resp, total_usage
)
desc = json_data["desc"]
else:
self.logger.debug("Processing Based on files")
# We recurrsively get feedback for files and then get basic summary
Expand All @@ -159,13 +160,18 @@ def generate_pull_request_desc(
continue
resp, usage = self.provider.chat_completion(prompt, user=user)
total_usage = self.provider.update_usage(total_usage, usage)
desc_json = parser.extract_json(resp)
desc_json, total_usage = parser.extract_json_with_llm_retry(
self.provider, resp, total_usage
)
descs.append(desc_json["desc"])

prompt = MERGE_PR_DESCRIPTION_PROMPT.format(DESCS=json.dumps(descs))
resp, usage = self.provider.chat_completion(prompt, user=user)
total_usage = self.provider.update_usage(total_usage, usage)
desc = parser.extract_json(resp)["desc"]
resp, total_usage = parser.extract_json_with_llm_retry(
self.provider, resp, total_usage
)
desc = resp["desc"]
body = output.create_pr_description(desc, pull_request_desc)
prompt_cost, completion_cost = self.provider.get_usage_cost(
total_usage=total_usage
Expand Down
6 changes: 1 addition & 5 deletions kaizen/reviewer/work_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,7 @@ def generate_work_summaries(
summaries = []
# Try to merge the files untill LLM can process the response
combined_diff_data = ""
total_usage = {
"prompt_tokens": 0,
"completion_tokens": 0,
"total_tokens": 0
}
total_usage = {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0}
for file_dict in diff_file_data:
temp_prompt = combined_diff_data
temp_prompt += f"""\n---->\nFile Name: {file_dict["file"]}\nPatch: {file_dict["patch"]}\n Status: {file_dict["status"]}"""
Expand Down

0 comments on commit b4b9799

Please sign in to comment.