Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ dev = [
"pytest-mock>=3.12.0,<4.0.0",
"pytest-timeout>=2.3.1,<3.0.0",
"mypy>=1.7.1,<2.0.0",
"black>=25.1.0,<26.0.0",
"black>=25.1.0,<27.0.0",
"beautifulsoup4>=4.12.3,<5.0.0",
"flaky>=3.8.1,<4.0.0",
"types-beautifulsoup4>=4.12.0.20240229,<5.0.0",
Expand Down
1 change: 0 additions & 1 deletion src/modelbench/benchmark_runner_items.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
)
from modelgauge.sut import PromptResponseSUT, SUTResponse


# in their own file to solve circular import problems


Expand Down
1 change: 0 additions & 1 deletion src/modelgauge/annotators/cheval/annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from modelgauge.secret_values import RequiredSecret, SecretDescription
from modelgauge.sut import SUTResponse


_CHEVAL_SCOPE = "cheval"


Expand Down
1 change: 0 additions & 1 deletion src/modelgauge/annotators/cheval/registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
from modelgauge.annotators.cheval.ids import SAFETY_ANNOTATOR_V1_1_UID, SECURITY_ANNOTATOR_V0_5_UID
from modelgauge.secret_values import InjectSecret


ANNOTATORS.register(
ChevalAnnotator,
SAFETY_ANNOTATOR_V1_1_UID,
Expand Down
1 change: 0 additions & 1 deletion src/modelgauge/data_schema.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from abc import ABC, abstractmethod
from typing import Optional


# The first value is the preferred name.
PROMPT_UID_COLS = ["prompt_uid", "release_prompt_id"]
PROMPT_TEXT_COLS = ["prompt_text"]
Expand Down
1 change: 0 additions & 1 deletion src/modelgauge/ensemble_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

from modelgauge.annotation import SafetyAnnotation


T = TypeVar("T", bound=SafetyAnnotation)


Expand Down
4 changes: 2 additions & 2 deletions src/modelgauge/suts/azure_client.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from abc import ABC, abstractmethod
from typing import List, Optional

import requests # type:ignore
import requests # type: ignore
from pydantic import BaseModel
from requests.adapters import HTTPAdapter, Retry # type:ignore
from requests.adapters import HTTPAdapter, Retry # type: ignore

from modelgauge.general import APIException
from modelgauge.prompt import TextPrompt
Expand Down
4 changes: 2 additions & 2 deletions src/modelgauge/suts/meta_llama_client.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional

import requests # type:ignore
import requests # type: ignore
from httpx import Timeout
from llama_api_client import LlamaAPIClient
from llama_api_client.types import CreateChatCompletionResponse, MessageTextContentItem, ModerationCreateResponse
Expand All @@ -14,7 +14,7 @@
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS
from pydantic import BaseModel
from requests.adapters import HTTPAdapter, Retry # type:ignore
from requests.adapters import HTTPAdapter, Retry # type: ignore


class MetaLlamaApiKey(RequiredSecret):
Expand Down
1 change: 0 additions & 1 deletion src/modelgauge/suts/nvidia_nim_api_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from modelgauge.sut_decorator import modelgauge_sut
from modelgauge.sut_registry import SUTS


BASE_URL = "https://integrate.api.nvidia.com/v1"


Expand Down
4 changes: 2 additions & 2 deletions src/modelgauge/suts/together_client.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import time
from typing import List, Optional

import requests # type:ignore
import requests # type: ignore
from airrlogger.log_config import get_logger
from pydantic import BaseModel
from requests.adapters import HTTPAdapter, Retry # type:ignore
from requests.adapters import HTTPAdapter, Retry # type: ignore

from modelgauge.auth.together_key import TogetherApiKey
from modelgauge.general import APIException
Expand Down
1 change: 0 additions & 1 deletion tests/modelgauge_tests/sut_tests/test_baseten_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from modelgauge.prompt import TextPrompt
from modelgauge.typed_data import is_typeable


FAKE_MODEL_NAME = "xyzzy"


Expand Down
16 changes: 4 additions & 12 deletions tests/modelgauge_tests/sut_tests/test_google_genai.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,7 @@ def test_google_genai_translate_response_finish_reason_other(google_default_sut,


def test_google_genai_translate_response_no_completions(google_default_sut, some_request):
no_completions = GenerateContentResponse(
**json.loads(
"""{
no_completions = GenerateContentResponse(**json.loads("""{
"candidates": [],
"usage_metadata": {
"prompt_token_count": 19,
Expand All @@ -176,18 +174,14 @@ def test_google_genai_translate_response_no_completions(google_default_sut, some
"candidates_token_count": 0
}
}
"""
)
)
"""))
response = google_default_sut.translate_response(some_request, no_completions)

assert response == SUTResponse(text=REFUSAL_RESPONSE)


def test_google_genai_translate_response_none_completions(google_default_sut, some_request):
no_completions = GenerateContentResponse(
**json.loads(
"""{
no_completions = GenerateContentResponse(**json.loads("""{
"candidates": null,
"usage_metadata": {
"prompt_token_count": 19,
Expand All @@ -196,9 +190,7 @@ def test_google_genai_translate_response_none_completions(google_default_sut, so
"candidates_token_count": 0
}
}
"""
)
)
"""))
response = google_default_sut.translate_response(some_request, no_completions)

assert response == SUTResponse(text=REFUSAL_RESPONSE)
2 changes: 1 addition & 1 deletion tests/modelgauge_tests/sut_tests/test_meta_llama.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from modelgauge.model_options import ModelOptions
from modelgauge.suts.meta_llama_client import InputMessage, MetaLlamaApiKey, MetaLlamaChatRequest, MetaLlamaSUT
from pytest import fixture
from requests import HTTPError # type:ignore
from requests import HTTPError # type: ignore

llama_chat_response_text = """
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ def test_openai_chat_translate_response():
messages=[],
)
# response is base on openai request: https://platform.openai.com/docs/api-reference/chat/create
response = ChatCompletion.model_validate_json(
"""\
response = ChatCompletion.model_validate_json("""\
{
"id": "chatcmpl-123",
"object": "chat.completion",
Expand All @@ -53,7 +52,6 @@ def test_openai_chat_translate_response():
"total_tokens": 21
}
}
"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(text="Hello there, how may I assist you today?", top_logprobs=None)
14 changes: 5 additions & 9 deletions tests/modelgauge_tests/sut_tests/test_openai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_openai_constructor():
with_client = OpenAIChat(
uid="test-model",
model="some-model",
client=client, # type:ignore
client=client, # type: ignore
)

# these should all fail
Expand Down Expand Up @@ -114,8 +114,7 @@ def test_openai_chat_translate_response():
messages=[],
)
# Pulled from https://platform.openai.com/docs/api-reference/chat/create
response = ChatCompletion.model_validate_json(
"""\
response = ChatCompletion.model_validate_json("""\
{
"id": "chatcmpl-123",
"object": "chat.completion",
Expand All @@ -137,8 +136,7 @@ def test_openai_chat_translate_response():
"total_tokens": 21
}
}
"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(text="Hello there, how may I assist you today?", top_logprobs=None)

Expand All @@ -151,8 +149,7 @@ def test_openai_chat_translate_response_logprobs():
logprobs=True,
)
# Copied from a real response.
response = ChatCompletion.model_validate_json(
"""\
response = ChatCompletion.model_validate_json("""\
{
"id": "made-this-fake",
"choices": [
Expand Down Expand Up @@ -234,8 +231,7 @@ def test_openai_chat_translate_response_logprobs():
"total_tokens": 11
}
}
"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(
text="Hello!",
Expand Down
20 changes: 7 additions & 13 deletions tests/modelgauge_tests/sut_tests/test_together_client.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from unittest.mock import patch, MagicMock

import pytest
from requests import HTTPError # type:ignore
from requests import HTTPError # type: ignore
import json

from modelgauge.general import APIException
Expand Down Expand Up @@ -186,8 +186,7 @@ def test_together_completions_translate_response():
prompt="My favorite colors are red and ",
max_tokens=2,
)
response = TogetherCompletionsResponse.model_validate_json(
"""\
response = TogetherCompletionsResponse.model_validate_json("""\
{
"id": "87cc221c3b411064-ORD",
"object": "text.completion",
Expand All @@ -209,8 +208,7 @@ def test_together_completions_translate_response():
}
}

"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(text=" blue.", top_logprobs=None)

Expand All @@ -223,8 +221,7 @@ def test_together_completions_translate_response_logprobs():
max_tokens=2,
logprobs=1,
)
response = TogetherCompletionsResponse.model_validate_json(
"""\
response = TogetherCompletionsResponse.model_validate_json("""\
{
"id": "87cc221c3b411064-ORD",
"object": "text.completion",
Expand Down Expand Up @@ -259,8 +256,7 @@ def test_together_completions_translate_response_logprobs():
"total_tokens": 10
}
}
"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(
text=" blue.",
Expand Down Expand Up @@ -302,8 +298,7 @@ def test_together_chat_translate_response_logprobs():
max_tokens=2,
logprobs=1,
)
response = TogetherChatResponse.model_validate_json(
"""\
response = TogetherChatResponse.model_validate_json("""\
{
"id": "87ca703b9c6710af-ORD",
"object": "chat.completion",
Expand Down Expand Up @@ -332,8 +327,7 @@ def test_together_chat_translate_response_logprobs():
"total_tokens": 7
}
}
"""
)
""")
result = client.translate_response(request, response)
assert result == SUTResponse(
text="Some response",
Expand Down
1 change: 0 additions & 1 deletion tests/modelgauge_tests/test_annotation_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
from modelgauge_tests.fake_ensemble_strategy import FakeEnsembleStrategy
from modelgauge_tests.test_prompt_pipeline import FakePromptInput


PROMPT_RESPONSE_SCHEMA = PromptResponseSchema.default()


Expand Down
16 changes: 4 additions & 12 deletions tests/modelgauge_tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,12 +72,10 @@ def test_load_secrets_works_with_file_path(tmpdir):
os.makedirs(config_dir)
secrets_file = config_dir.join("secrets.toml")
with open(secrets_file, "w") as f:
f.write(
"""\
f.write("""\
[scope]
api_key = "12345"
"""
)
""")
secrets = load_secrets_from_config(secrets_file)
assert secrets == {"scope": {"api_key": "12345"}}

Expand Down Expand Up @@ -113,15 +111,12 @@ def test_raise_if_missing_from_config_single():
raise_if_missing_from_config([missing], config_path="some/path.toml")

absolute_path = str(pathlib.Path("some/path.toml").absolute())
assert (
str(err_info.value)
== f"""\
assert str(err_info.value) == f"""\
To perform this run you need to add the following values to your secrets file '{absolute_path}':
[some-scope]
# some-instructions
some-key="<value>"
"""
)


def test_raise_if_missing_from_config_combines():
Expand All @@ -139,9 +134,7 @@ def test_raise_if_missing_from_config_combines():

absolute_path = str(pathlib.Path("some/path.toml").absolute())

assert (
str(err_info.value)
== f"""\
assert str(err_info.value) == f"""\
To perform this run you need to add the following values to your secrets file '{absolute_path}':
[scope1]
# instructions1
Expand All @@ -153,4 +146,3 @@ def test_raise_if_missing_from_config_combines():
# instructions3
key1="<value>"
"""
)
1 change: 0 additions & 1 deletion tests/modelgauge_tests/test_external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@

from tenacity import wait_none


WebDataMockResponse = namedtuple("WebDataMockResponse", ("ok", "content"))
GDriveFileToDownload = namedtuple("GDriveFileToDownload", ("id", "path"))

Expand Down
Loading
Loading