From 3a4a545d353c5a237ca7a573042ca58b013afe8c Mon Sep 17 00:00:00 2001 From: abdimk Date: Mon, 23 Mar 2026 15:15:29 +0300 Subject: [PATCH 1/2] fix: add missing project metadata to pyproject.toml --- pyproject.toml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 361d2a5..eb4dbf0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,8 @@ +[project] +name = "llmlingua" +version = "0.2.2" +dynamic = ["description", "readme", "requires-python", "license", "authors", "keywords", "classifiers", "dependencies", "optional-dependencies"] + [tool.black] line-length = 88 target-version = ['py38'] @@ -8,4 +13,4 @@ atomic = true profile = "black" line_length = 88 skip_gitignore = true -known_first_party = ["llmlingua"] +known_first_party = ["llmlingua"] \ No newline at end of file From f598dde113b590d9b5e07db903b29efdc0ce839e Mon Sep 17 00:00:00 2001 From: abdimk Date: Mon, 23 Mar 2026 15:58:44 +0300 Subject: [PATCH 2/2] fix: resolve ruff lint errors --- examples/CoT.ipynb | 22 ++++----- examples/LLMLingua2.ipynb | 10 ++-- examples/Retrieval.ipynb | 2 +- experiments/llmlingua2/evaluation/eval_bbh.py | 2 +- .../llmlingua2/evaluation/eval_gsm8k.py | 26 +++++----- .../llmlingua2/evaluation/eval_longbench.py | 4 +- experiments/llmlingua2/evaluation/metrics.py | 6 +-- .../model_training/train_roberta.py | 5 +- experiments/securitylingua/filter.py | 2 +- experiments/securitylingua/label_word.py | 1 - experiments/securitylingua/train_roberta.py | 5 +- llmlingua/prompt_compressor.py | 49 +++++++++---------- tests/test_llmlingua.py | 2 +- tests/test_longllmlingua.py | 2 +- 14 files changed, 62 insertions(+), 76 deletions(-) diff --git a/examples/CoT.ipynb b/examples/CoT.ipynb index 336fed8..8758540 100644 --- a/examples/CoT.ipynb +++ b/examples/CoT.ipynb @@ -449,9 +449,9 @@ " questions = []\n", " ans_pred = []\n", " ans_gold = []\n", - " for l in lines:\n", - " l = l.replace(\",\", \"\")\n", - " if l.startswith(\"Q: \"):\n", + " for line in lines:\n", + " line = line.replace(\",\", \"\")\n", + " if line.startswith(\"Q: \"):\n", " if am is not None and a is not None:\n", " questions.append(q)\n", " ans_pred.append(am)\n", @@ -459,21 +459,21 @@ " if test_answer(am, a):\n", " acc += 1\n", " current_mode = \"q\"\n", - " q = l\n", + " q = line\n", " num_q += 1\n", - " elif l.startswith(\"A_model:\"):\n", + " elif line.startswith(\"A_model:\"):\n", " current_mode = \"am\"\n", - " am = l\n", - " elif l.startswith(\"A:\"):\n", + " am = line\n", + " elif line.startswith(\"A:\"):\n", " current_mode = \"a\"\n", - " a = l\n", + " a = line\n", " else:\n", " if current_mode == \"q\":\n", - " q += l\n", + " q += line\n", " elif current_mode == \"am\":\n", - " am += l\n", + " am += line\n", " elif current_mode == \"a\":\n", - " a += l\n", + " a += line\n", " else:\n", " raise ValueError(current_mode)\n", "\n", diff --git a/examples/LLMLingua2.ipynb b/examples/LLMLingua2.ipynb index f4bf210..ceddb0e 100644 --- a/examples/LLMLingua2.ipynb +++ b/examples/LLMLingua2.ipynb @@ -67,12 +67,12 @@ "openai.api_key = \"\"\n", "\n", "# or Using the AOAI\n", - "import openai\n", + "# import openai\n", "\n", - "openai.api_key = \"\"\n", - "openai.api_base = \"\"\n", - "openai.api_type = \"azure\"\n", - "openai.api_version = \"2023-05-15\"" + "# openai.api_key = \"\"\n", + "# openai.api_base = \"\"\n", + "# openai.api_type = \"azure\"\n", + "# openai.api_version = \"2023-05-15\"" ] }, { diff --git a/examples/Retrieval.ipynb b/examples/Retrieval.ipynb index b960a23..b44a732 100644 --- a/examples/Retrieval.ipynb +++ b/examples/Retrieval.ipynb @@ -199,7 +199,7 @@ " )\n", "\n", " c = prompt.split(\"\\n\\n\")\n", - " instruction, question = c[0], c[-1]\n", + " question = c[-1]\n", " demonstration = \"\\n\".join(c[1:-1])\n", " corpus = demonstration.split(\"\\n\")\n", "\n", diff --git a/experiments/llmlingua2/evaluation/eval_bbh.py b/experiments/llmlingua2/evaluation/eval_bbh.py index 96fb319..f49e8ec 100644 --- a/experiments/llmlingua2/evaluation/eval_bbh.py +++ b/experiments/llmlingua2/evaluation/eval_bbh.py @@ -246,7 +246,7 @@ def predict(): results[idx] = {"question": q, "model_answer": answer, "truth_answer": a} json.dump(results, open(args.save_path, "w"), indent=4) - ans_ = extract_ans(answer, task_type) + extract_ans(answer, task_type) if task_type == "multiple_choice": a = a[1] res = "%dTask:%s\n%s\nA_model:%s\nA_target:%s\n\n" % ( diff --git a/experiments/llmlingua2/evaluation/eval_gsm8k.py b/experiments/llmlingua2/evaluation/eval_gsm8k.py index 259f686..71797ef 100644 --- a/experiments/llmlingua2/evaluation/eval_gsm8k.py +++ b/experiments/llmlingua2/evaluation/eval_gsm8k.py @@ -56,37 +56,37 @@ def extract_ans(ans_model): def parse_pred_ans(filename): with open(filename) as fd: lines = fd.readlines() - am, a = None, None + am, a, q = None, None, None num_q, acc = 0, 0 current_mode = "none" questions = [] ans_pred = [] ans_gold = [] - for l in lines: - l = l.replace(",", "") - if l.startswith("Q: "): - if am is not None and a is not None: + for line in lines: + line = line.replace(",", "") + if line.startswith("Q: "): + if am is not None and a is not None and q is not None: questions.append(q) ans_pred.append(am) ans_gold.append(a) if test_answer(am, a): acc += 1 current_mode = "q" - q = l + q = line num_q += 1 - elif l.startswith("A_model:"): + elif line.startswith("A_model:"): current_mode = "am" - am = l - elif l.startswith("A:"): + am = line + elif line.startswith("A:"): current_mode = "a" - a = l + a = line else: if current_mode == "q": - q += l + q += line elif current_mode == "am": - am += l + am += line elif current_mode == "a": - a += l + a += line else: raise ValueError(current_mode) diff --git a/experiments/llmlingua2/evaluation/eval_longbench.py b/experiments/llmlingua2/evaluation/eval_longbench.py index 467d904..39161aa 100644 --- a/experiments/llmlingua2/evaluation/eval_longbench.py +++ b/experiments/llmlingua2/evaluation/eval_longbench.py @@ -188,10 +188,9 @@ def eval(load_path): lengths[data["task"]].append(data["length"]) scores = {} for task in predictions.keys(): - pred_list, ans_list, length_list = ( + pred_list, ans_list = ( predictions[task], answers[task], - lengths[task], ) score = scorer(task, pred_list, ans_list, all_classes[task]) print(score) @@ -272,7 +271,6 @@ def predict(): for sample in tqdm(dataset): idx = int(sample["idx"]) - task = sample["task"] if idx in results or str(idx) in results: print(f"{idx} processed") continue diff --git a/experiments/llmlingua2/evaluation/metrics.py b/experiments/llmlingua2/evaluation/metrics.py index c4aa87a..c5bec63 100644 --- a/experiments/llmlingua2/evaluation/metrics.py +++ b/experiments/llmlingua2/evaluation/metrics.py @@ -2,6 +2,7 @@ # Licensed under The MIT License [see LICENSE for details] import re +import regex import string from collections import Counter from typing import List @@ -114,7 +115,7 @@ def rouge_score(prediction, ground_truth, **kwargs): rouge = Rouge() try: scores = rouge.get_scores([prediction], [ground_truth], avg=True) - except: + except Exception: return 0.0 return scores["rouge-l"]["f"] @@ -166,9 +167,6 @@ def qa_score(prediction, ground_truths): return 0.0 -import regex - - def normalize_answer2(s: str) -> str: """Normalization from the SQuAD evaluation script. diff --git a/experiments/llmlingua2/model_training/train_roberta.py b/experiments/llmlingua2/model_training/train_roberta.py index a5ae084..fc1e925 100644 --- a/experiments/llmlingua2/model_training/train_roberta.py +++ b/experiments/llmlingua2/model_training/train_roberta.py @@ -60,7 +60,7 @@ def train(epoch): model.train() for idx, batch in enumerate(train_dataloader): - t = time.time() + time.time() ids = batch["ids"].to(device, dtype=torch.long) mask = batch["mask"].to(device, dtype=torch.long) targets = batch["targets"].to(device, dtype=torch.long) @@ -149,8 +149,7 @@ def test(model, eval_dataloader): ) eval_accuracy += tmp_eval_accuracy - labels = [label.item() for label in eval_labels] - predictions = [pred.item() for pred in eval_preds] + predictions = [pred.item() for pred in eval_preds] eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_steps diff --git a/experiments/securitylingua/filter.py b/experiments/securitylingua/filter.py index a3ddcb1..8fc5cc8 100644 --- a/experiments/securitylingua/filter.py +++ b/experiments/securitylingua/filter.py @@ -3,7 +3,7 @@ import argparse from collections import defaultdict -from typing import Dict, List, Tuple, DefaultDict +from typing import List, Tuple, DefaultDict import numpy as np import torch diff --git a/experiments/securitylingua/label_word.py b/experiments/securitylingua/label_word.py index 94c9b4c..d319db5 100644 --- a/experiments/securitylingua/label_word.py +++ b/experiments/securitylingua/label_word.py @@ -12,7 +12,6 @@ import torch from tqdm import tqdm from multiprocessing import Pool -import multiprocessing def setup_logging(save_path: str) -> logging.Logger: """Setup logging configuration""" diff --git a/experiments/securitylingua/train_roberta.py b/experiments/securitylingua/train_roberta.py index b7ce4e4..2700f8d 100644 --- a/experiments/securitylingua/train_roberta.py +++ b/experiments/securitylingua/train_roberta.py @@ -4,14 +4,11 @@ import argparse import os import random -import time -from typing import List, Tuple, Dict +from typing import List, Tuple import torch from sklearn.metrics import accuracy_score -from torch import cuda from torch.utils.data import DataLoader -from torch.utils.tensorboard import SummaryWriter from tqdm import tqdm from transformers import AutoModelForTokenClassification, AutoTokenizer from utils import TokenClfDataset diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py index 84e390e..035572c 100644 --- a/llmlingua/prompt_compressor.py +++ b/llmlingua/prompt_compressor.py @@ -5,7 +5,6 @@ import copy import json import re -import string from collections import defaultdict from typing import List, Union @@ -796,7 +795,7 @@ def compress_prompt_llmlingua2( chunk_end_tokens.append(token_map[c]) chunk_end_tokens = set(chunk_end_tokens) - if type(context) == str: + if isinstance(context, str): context = [context] context = copy.deepcopy(context) @@ -1221,10 +1220,10 @@ def control_context_budget( if reorder_context == "original": used = sorted(used) elif reorder_context == "two_stage": - l, r = [_ for idx, _ in enumerate(used) if idx % 2 == 0], [ + left, right = [_ for idx, _ in enumerate(used) if idx % 2 == 0], [ _ for idx, _ in enumerate(used) if idx % 2 == 1 ] - used = l + r[::-1] + used = left + right[::-1] if dynamic_context_compression_ratio > 0: N = len(used) @@ -1764,8 +1763,8 @@ def match_from_compressed(response_word): response_c[original_input_ids[idx]].append(idx) res, res_min, res_c = None, float("inf"), 1 n = len(response_input_ids) - for l in response_c[response_input_ids[0]]: - x, y, c = 0, l, 1 + for start_idx in response_c[response_input_ids[0]]: + x, y, c = 0, start_idx, 1 for x in range(1, n): idx = bisect.bisect_right(response_c[response_input_ids[x]], y) if ( @@ -1777,18 +1776,14 @@ def match_from_compressed(response_word): y = response_c[response_input_ids[x]][idx] if c > res_c: res_c = c - res_min = y - l + 1 - res = (l, y + 1) - elif c == res_c and y - l + 1 < res_min: - res_min = y - l + 1 - res = (l, y + 1) + res_min = y - start_idx + 1 + res = (start_idx, y + 1) + elif c == res_c and y - start_idx + 1 < res_min: + res_min = y - start_idx + 1 + res = (start_idx, y + 1) if res is None: return response_word - # while l > 0 and not self.tokenizer.convert_ids_to_tokens(original_input_ids[l]).startswith("_"): - # l -= 1 - # while r < M - 1 and not self.tokenizer.convert_ids_to_tokens(original_input_ids[l]).startswith("_"): - # l -= 1 return self.tokenizer.decode(original_input_ids[res[0] : res[1]]) response_words = response.split(" ") @@ -1798,21 +1793,21 @@ def match_from_compressed(response_word): ] N, M = len(response_words), len(original_input_ids) recovered_response_words = [] - l = 0 - while l < N: - if response_words[l] not in compressed_prompt: - recovered_response_words.append(response_words[l]) - l += 1 + left = 0 + while left < N: + if response_words[left] not in compressed_prompt: + recovered_response_words.append(response_words[left]) + left += 1 continue - r = l + right = left while ( - r + 1 < N and " ".join(response_words[l : r + 2]) in compressed_prompt + right + 1 < N and " ".join(response_words[left : right + 2]) in compressed_prompt ): - r += 1 + right += 1 - match_words = match_from_compressed(" ".join(response_words[l : r + 1])) + match_words = match_from_compressed(" ".join(response_words[left : right + 1])) recovered_response_words.append(match_words) - l = r + 1 + left = right + 1 return " ".join(recovered_response_words) def get_rank_results( @@ -2176,7 +2171,7 @@ def __get_context_prob( mask = batch["mask"].to(self.device, dtype=torch.long) == 1 outputs = self.model(input_ids=ids, attention_mask=mask) - loss, logits = outputs.loss, outputs.logits + logits = outputs.logits probs = F.softmax(logits, dim=-1) for j in range(ids.shape[0]): @@ -2355,7 +2350,7 @@ def split_string_to_words(input_string): mask = batch["mask"].to(self.device, dtype=torch.long) == 1 outputs = self.model(input_ids=ids, attention_mask=mask) - loss, logits = outputs.loss, outputs.logits + logits = outputs.logits probs = F.softmax(logits, dim=-1) for j in range(ids.shape[0]): diff --git a/tests/test_llmlingua.py b/tests/test_llmlingua.py index 60f766e..4cd9f0a 100644 --- a/tests/test_llmlingua.py +++ b/tests/test_llmlingua.py @@ -63,7 +63,7 @@ def __init__(self, *args, **kwargs): import nltk nltk.download("punkt") - except: + except Exception: print("nltk_data exits.") self.llmlingua = PromptCompressor("lgaalves/gpt2-dolly", device_map="cpu") diff --git a/tests/test_longllmlingua.py b/tests/test_longllmlingua.py index 2763cc9..ff128f2 100644 --- a/tests/test_longllmlingua.py +++ b/tests/test_longllmlingua.py @@ -65,7 +65,7 @@ def __init__(self, *args, **kwargs): import nltk nltk.download("punkt") - except: + except Exception: print("nltk_data exits.") self.llmlingua = PromptCompressor("lgaalves/gpt2-dolly", device_map="cpu")