Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -367,13 +367,26 @@ More details can be found in the [lm_eval](https://github.com/EleutherAI/lm-eval

Please note, for tasks such as NER, the automated evaluation is based on a specific pattern. This might fail to extract relevant information in zero-shot settings, resulting in relatively lower performance compared to previous human-annotated results.

**OpenAI**

```bash
export OPENAI_API_SECRET_KEY=YOUR_KEY_HERE
python eval.py \
--model gpt-4 \
--tasks flare_ner,flare_sm_acl,flare_fpb
```

**MiniMax**

[MiniMax](https://www.minimaxi.com/) provides large language models (M2.7, M2.5) with 204K context windows via an OpenAI-compatible API. Supported models: `MiniMax-M2.7`, `MiniMax-M2.7-highspeed`, `MiniMax-M2.5`, `MiniMax-M2.5-highspeed`.

```bash
export MINIMAX_API_KEY=YOUR_MINIMAX_KEY_HERE
python eval.py \
--model MiniMax-M2.7 \
--tasks flare_ner,flare_sm_acl,flare_fpb
```

3. Self-Hosted Evaluation

To run inference backend:
Expand Down
19 changes: 12 additions & 7 deletions src/chatlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ async def single_chat(client, **kwargs):


async def oa_completion(**kwargs):
"""Query OpenAI API for completion.
"""Query OpenAI-compatible API for completion.

Retry with back-off until they respond
"""
Expand All @@ -50,6 +50,10 @@ async def oa_completion(**kwargs):
class ChatLM(BaseLM):
REQ_CHUNK_SIZE = 20

# Default API configuration (OpenAI)
API_BASE_URL = "https://api.openai.com/v1/chat/completions"
API_KEY_ENV = "OPENAI_API_SECRET_KEY"

def __init__(self, model, truncate=False):
"""

Expand All @@ -59,12 +63,9 @@ def __init__(self, model, truncate=False):
"""
super().__init__()

import openai

self.model = model
self.truncate = truncate
# Read from environment variable OPENAI_API_SECRET_KEY
api_key = os.environ["OPENAI_API_SECRET_KEY"]
api_key = os.environ[self.API_KEY_ENV]
self.tokenizer = transformers.GPT2TokenizerFast.from_pretrained("gpt2")
self.headers = {
"Content-Type": "application/json",
Expand Down Expand Up @@ -136,12 +137,12 @@ def sameuntil_chunks(xs, size):
inps.append(context[0])

responses = asyncio.run(oa_completion(
url="https://api.openai.com/v1/chat/completions",
url=self.API_BASE_URL,
headers=self.headers,
model=self.model,
messages=[{"role": "user", "content": inp} for inp in inps],
max_tokens=self.max_gen_toks,
temperature=0.0,
temperature=self._get_temperature(0.0),
# stop=until,
))

Expand All @@ -155,6 +156,10 @@ def sameuntil_chunks(xs, size):

return re_ord.get_original(res)

def _get_temperature(self, temperature):
"""Return a valid temperature value for this provider."""
return temperature

def _model_call(self, inps):
# Isn't used because we override _loglikelihood_tokens
raise NotImplementedError()
Expand Down
5 changes: 4 additions & 1 deletion src/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

from model_prompt import MODEL_PROMPT_MAP
from chatlm import ChatLM
from minimax_lm import MiniMaxLM, MINIMAX_MODELS
import tasks as ta

@positional_deprecated
Expand Down Expand Up @@ -74,7 +75,9 @@ def simple_evaluate(
if isinstance(model, str):
if model_args is None:
model_args = ""
if model[:3] != "gpt":
if model in MINIMAX_MODELS:
lm = MiniMaxLM(model)
elif model[:3] != "gpt":
lm = lm_eval.models.get_model(model).create_from_arg_string(
model_args, {"batch_size": batch_size, "max_batch_size": max_batch_size, "device": device}
)
Expand Down
27 changes: 27 additions & 0 deletions src/factscore_package/minimax_lm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from .openai_lm import OpenAIModel
import os


class MiniMaxModel(OpenAIModel):
"""MiniMax LLM for FActScore evaluation via OpenAI-compatible API."""

def __init__(self, model_name="MiniMax-M2.7", cache_file=None):
key = os.environ.get("MINIMAX_API_KEY", "")
super().__init__(
model_name=model_name,
cache_file=cache_file,
key=key,
api_base="https://api.minimax.io/v1",
)
# MiniMax requires temperature in (0.0, 1.0]
self.temp = 0.7

def _generate(self, prompt, max_sequence_length=2048, max_output_length=128):
if self.add_n % self.save_interval == 0:
self.save_cache()
message = [{"role": "user", "content": prompt}]
response = self.call_ChatGPT(
message, model_name=self.model_name, temp=self.temp, max_len=max_sequence_length
)
output = response.choices[0].message.content
return output, response
9 changes: 5 additions & 4 deletions src/factscore_package/openai_lm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
import numpy as np
import logging

#os.environ["http_proxy"] = "http://localhost:27890"
#os.environ["https_proxy"] = "http://localhost:27890"

class OpenAIModel(LM):

def __init__(self, model_name, cache_file=None, key=""):
def __init__(self, model_name, cache_file=None, key="", api_base=None):
self.model_name = model_name
self.temp = 0.7
self.save_interval = 100
self.client = OpenAI(api_key=key.strip())
client_kwargs = {"api_key": key.strip()}
if api_base:
client_kwargs["base_url"] = api_base
self.client = OpenAI(**client_kwargs)
super().__init__(cache_file)

def load_model(self):
Expand Down
34 changes: 34 additions & 0 deletions src/minimax_lm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
from chatlm import ChatLM


# MiniMax models and their context window sizes
MINIMAX_MODELS = {
"MiniMax-M2.7": 204800,
"MiniMax-M2.7-highspeed": 204800,
"MiniMax-M2.5": 204800,
"MiniMax-M2.5-highspeed": 204800,
}


class MiniMaxLM(ChatLM):
"""Language model class for MiniMax's OpenAI-compatible API.

MiniMax provides an OpenAI-compatible chat completions endpoint at
https://api.minimax.io/v1/chat/completions. This class configures
ChatLM to use MiniMax instead of OpenAI.

Environment variable: MINIMAX_API_KEY
"""

API_BASE_URL = "https://api.minimax.io/v1/chat/completions"
API_KEY_ENV = "MINIMAX_API_KEY"

@property
def max_length(self):
return MINIMAX_MODELS.get(self.model, 204800)

def _get_temperature(self, temperature):
"""MiniMax requires temperature in (0.0, 1.0]."""
if temperature <= 0.0:
return 0.01
return min(temperature, 1.0)
Loading