Skip to content

Commit

Permalink
fix log probs
Browse files Browse the repository at this point in the history
  • Loading branch information
penguine-ip committed Mar 10, 2025
1 parent f832a9a commit 6550550
Show file tree
Hide file tree
Showing 4 changed files with 257 additions and 287 deletions.
43 changes: 19 additions & 24 deletions deepeval/metrics/g_eval/g_eval.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""LLM evaluated metric based on the GEval framework: https://arxiv.org/pdf/2303.16634.pdf"""

from typing import Optional, List, Tuple, Union, Dict
from pydantic import BaseModel
from langchain.schema import AIMessage
import math
from deepeval.metrics import BaseMetric
Expand All @@ -11,6 +10,7 @@
ConversationalTestCase,
ToolCall,
)

from deepeval.metrics.g_eval.template import GEvalTemplate
from deepeval.utils import get_or_create_event_loop, prettify_list
from deepeval.metrics.utils import (
Expand All @@ -20,6 +20,7 @@
initialize_model,
)
from deepeval.models import DeepEvalBaseLLM, GPTModel
from deepeval.models.gpt_model import unsupported_log_probs_gpt_models
from deepeval.metrics.indicator import metric_progress_indicator
from deepeval.metrics.g_eval.schema import *

Expand All @@ -34,6 +35,19 @@
}


def no_log_prob_support(model: Union[str, DeepEvalBaseLLM]):

if isinstance(model, str) and model in unsupported_log_probs_gpt_models:
return True
elif (
isinstance(model, GPTModel)
and model.model_name in unsupported_log_probs_gpt_models
):
return True

return False


def construct_g_eval_params_string(
llm_test_case_params: List[LLMTestCaseParams],
):
Expand Down Expand Up @@ -246,18 +260,8 @@ async def _a_evaluate(

try:
# don't use log probabilities for unsupported gpt models
unsupported_gpt_models = {
"o1",
"o1-preview",
"o1-2024-12-17",
"o3-mini",
"o3-mini-2025-01-31",
}
if isinstance(self.model, str) and self.model in unsupported_gpt_models:
raise AttributeError(f"Model {self.model} is unsupported.")
if isinstance(self.model, GPTModel) and self.model.model_name in unsupported_gpt_models:
raise AttributeError(f"Model {self.model.model} is unsupported.")

if no_log_prob_support(self.model):
raise AttributeError("log_probs unsupported.")

# Don't have to check for using native model
# since generate raw response only exist for deepeval's native model
Expand Down Expand Up @@ -325,17 +329,8 @@ def evaluate(self, test_case: LLMTestCase) -> Tuple[Union[int, float], str]:

try:
# don't use log probabilities for unsupported gpt models
unsupported_gpt_models = {
"o1",
"o1-preview",
"o1-2024-12-17",
"o3-mini",
"o3-mini-2025-01-31",
}
if isinstance(self.model, str) and self.model in unsupported_gpt_models:
raise AttributeError(f"Model {self.model} is unsupported.")
if isinstance(self.model, GPTModel) and self.model.model in unsupported_gpt_models:
raise AttributeError(f"Model {self.model.model} is unsupported.")
if no_log_prob_support(self.model):
raise AttributeError("log_probs unsupported.")

res, cost = self.model.generate_raw_response(
prompt, logprobs=True, top_logprobs=self.top_logprobs
Expand Down
14 changes: 14 additions & 0 deletions deepeval/models/gpt_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ def log_retry_error(retry_state):
"o1",
"o1-preview",
"o1-2024-12-17",
"o1-preview-2024-09-12",
"o1-mini",
"o1-mini-2024-09-12",
"o3-mini",
"o3-mini-2025-01-31",
]

unsupported_log_probs_gpt_models = [
"o1",
"o1-preview",
"o1-2024-12-17",
"o1-preview-2024-09-12",
"o1-mini",
"o1-mini-2024-09-12",
"o3-mini",
"o3-mini-2025-01-31",
]
Expand Down
Loading

0 comments on commit 6550550

Please sign in to comment.