microsoft · abdimk · Mar 23, 2026 · Mar 23, 2026
diff --git a/examples/CoT.ipynb b/examples/CoT.ipynb
@@ -449,31 +449,31 @@
     "    questions = []\n",
     "    ans_pred = []\n",
     "    ans_gold = []\n",
-    "    for l in lines:\n",
-    "        l = l.replace(\",\", \"\")\n",
-    "        if l.startswith(\"Q: \"):\n",
+    "    for line in lines:\n",
+    "        line = line.replace(\",\", \"\")\n",
+    "        if line.startswith(\"Q: \"):\n",
     "            if am is not None and a is not None:\n",
     "                questions.append(q)\n",
     "                ans_pred.append(am)\n",
     "                ans_gold.append(a)\n",
     "                if test_answer(am, a):\n",
     "                    acc += 1\n",
     "            current_mode = \"q\"\n",
-    "            q = l\n",
+    "            q = line\n",
     "            num_q += 1\n",
-    "        elif l.startswith(\"A_model:\"):\n",
+    "        elif line.startswith(\"A_model:\"):\n",
     "            current_mode = \"am\"\n",
-    "            am = l\n",
-    "        elif l.startswith(\"A:\"):\n",
+    "            am = line\n",
+    "        elif line.startswith(\"A:\"):\n",
     "            current_mode = \"a\"\n",
-    "            a = l\n",
+    "            a = line\n",
     "        else:\n",
     "            if current_mode == \"q\":\n",
-    "                q += l\n",
+    "                q += line\n",
     "            elif current_mode == \"am\":\n",
-    "                am += l\n",
+    "                am += line\n",
     "            elif current_mode == \"a\":\n",
-    "                a += l\n",
+    "                a += line\n",
     "            else:\n",
     "                raise ValueError(current_mode)\n",
     "\n",

diff --git a/examples/LLMLingua2.ipynb b/examples/LLMLingua2.ipynb
@@ -67,12 +67,12 @@
     "openai.api_key = \"<insert_openai_key>\"\n",
     "\n",
     "# or Using the AOAI\n",
-    "import openai\n",
+    "# import openai\n",
     "\n",
-    "openai.api_key = \"<insert_openai_key>\"\n",
-    "openai.api_base = \"<insert_openai_base>\"\n",
-    "openai.api_type = \"azure\"\n",
-    "openai.api_version = \"2023-05-15\""
+    "# openai.api_key = \"<insert_openai_key>\"\n",
+    "# openai.api_base = \"<insert_openai_base>\"\n",
+    "# openai.api_type = \"azure\"\n",
+    "# openai.api_version = \"2023-05-15\""
    ]
   },
   {

diff --git a/examples/Retrieval.ipynb b/examples/Retrieval.ipynb
@@ -199,7 +199,7 @@
     "            )\n",
     "\n",
     "            c = prompt.split(\"\\n\\n\")\n",
-    "            instruction, question = c[0], c[-1]\n",
+    "            question = c[-1]\n",
     "            demonstration = \"\\n\".join(c[1:-1])\n",
     "            corpus = demonstration.split(\"\\n\")\n",
     "\n",

diff --git a/experiments/llmlingua2/evaluation/eval_bbh.py b/experiments/llmlingua2/evaluation/eval_bbh.py
@@ -246,7 +246,7 @@ def predict():
         results[idx] = {"question": q, "model_answer": answer, "truth_answer": a}
         json.dump(results, open(args.save_path, "w"), indent=4)
 
-        ans_ = extract_ans(answer, task_type)
+        extract_ans(answer, task_type)
         if task_type == "multiple_choice":
             a = a[1]
         res = "%dTask:%s\n%s\nA_model:%s\nA_target:%s\n\n" % (

diff --git a/experiments/llmlingua2/evaluation/eval_gsm8k.py b/experiments/llmlingua2/evaluation/eval_gsm8k.py
@@ -56,37 +56,37 @@ def extract_ans(ans_model):
 def parse_pred_ans(filename):
     with open(filename) as fd:
         lines = fd.readlines()
-    am, a = None, None
+    am, a, q = None, None, None
     num_q, acc = 0, 0
     current_mode = "none"
     questions = []
     ans_pred = []
     ans_gold = []
-    for l in lines:
-        l = l.replace(",", "")
-        if l.startswith("Q: "):
-            if am is not None and a is not None:
+    for line in lines:
+        line = line.replace(",", "")
+        if line.startswith("Q: "):
+            if am is not None and a is not None and q is not None:
                 questions.append(q)
                 ans_pred.append(am)
                 ans_gold.append(a)
                 if test_answer(am, a):
                     acc += 1
             current_mode = "q"
-            q = l
+            q = line
             num_q += 1
-        elif l.startswith("A_model:"):
+        elif line.startswith("A_model:"):
             current_mode = "am"
-            am = l
-        elif l.startswith("A:"):
+            am = line
+        elif line.startswith("A:"):
             current_mode = "a"
-            a = l
+            a = line
         else:
             if current_mode == "q":
-                q += l
+                q += line
             elif current_mode == "am":
-                am += l
+                am += line
             elif current_mode == "a":
-                a += l
+                a += line
             else:
                 raise ValueError(current_mode)
 

diff --git a/experiments/llmlingua2/evaluation/eval_longbench.py b/experiments/llmlingua2/evaluation/eval_longbench.py
@@ -188,10 +188,9 @@ def eval(load_path):
             lengths[data["task"]].append(data["length"])
     scores = {}
     for task in predictions.keys():
-        pred_list, ans_list, length_list = (
+        pred_list, ans_list = (
             predictions[task],
             answers[task],
-            lengths[task],
         )
         score = scorer(task, pred_list, ans_list, all_classes[task])
         print(score)
@@ -272,7 +271,6 @@ def predict():
 
     for sample in tqdm(dataset):
         idx = int(sample["idx"])
-        task = sample["task"]
         if idx in results or str(idx) in results:
             print(f"{idx} processed")
             continue

diff --git a/experiments/llmlingua2/evaluation/metrics.py b/experiments/llmlingua2/evaluation/metrics.py
@@ -2,6 +2,7 @@
 # Licensed under The MIT License [see LICENSE for details]
 
 import re
+import regex
 import string
 from collections import Counter
 from typing import List
@@ -114,7 +115,7 @@ def rouge_score(prediction, ground_truth, **kwargs):
     rouge = Rouge()
     try:
         scores = rouge.get_scores([prediction], [ground_truth], avg=True)
-    except:
+    except Exception:
         return 0.0
     return scores["rouge-l"]["f"]
 
@@ -166,9 +167,6 @@ def qa_score(prediction, ground_truths):
     return 0.0
 
 
-import regex
-
-
 def normalize_answer2(s: str) -> str:
     """Normalization from the SQuAD evaluation script.
 

diff --git a/experiments/llmlingua2/model_training/train_roberta.py b/experiments/llmlingua2/model_training/train_roberta.py
@@ -60,7 +60,7 @@ def train(epoch):
     model.train()
 
     for idx, batch in enumerate(train_dataloader):
-        t = time.time()
+        time.time()
         ids = batch["ids"].to(device, dtype=torch.long)
         mask = batch["mask"].to(device, dtype=torch.long)
         targets = batch["targets"].to(device, dtype=torch.long)
@@ -149,8 +149,7 @@ def test(model, eval_dataloader):
             )
             eval_accuracy += tmp_eval_accuracy
 
-    labels = [label.item() for label in eval_labels]
-    predictions = [pred.item() for pred in eval_preds]
+        predictions = [pred.item() for pred in eval_preds]
 
     eval_loss = eval_loss / nb_eval_steps
     eval_accuracy = eval_accuracy / nb_eval_steps

diff --git a/experiments/securitylingua/filter.py b/experiments/securitylingua/filter.py
@@ -3,7 +3,7 @@
 
 import argparse
 from collections import defaultdict
-from typing import Dict, List, Tuple, DefaultDict
+from typing import List, Tuple, DefaultDict
 import numpy as np
 import torch
 

diff --git a/experiments/securitylingua/label_word.py b/experiments/securitylingua/label_word.py
@@ -12,7 +12,6 @@
 import torch
 from tqdm import tqdm
 from multiprocessing import Pool
-import multiprocessing
 
 def setup_logging(save_path: str) -> logging.Logger:
     """Setup logging configuration"""

diff --git a/experiments/securitylingua/train_roberta.py b/experiments/securitylingua/train_roberta.py
@@ -4,14 +4,11 @@
 import argparse
 import os
 import random
-import time
-from typing import List, Tuple, Dict
+from typing import List, Tuple
 
 import torch
 from sklearn.metrics import accuracy_score
-from torch import cuda
 from torch.utils.data import DataLoader
-from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
 from transformers import AutoModelForTokenClassification, AutoTokenizer
 from utils import TokenClfDataset

diff --git a/llmlingua/prompt_compressor.py b/llmlingua/prompt_compressor.py
@@ -5,7 +5,6 @@
 import copy
 import json
 import re
-import string
 from collections import defaultdict
 from typing import List, Union
 
@@ -796,7 +795,7 @@ def compress_prompt_llmlingua2(
                 chunk_end_tokens.append(token_map[c])
         chunk_end_tokens = set(chunk_end_tokens)
 
-        if type(context) == str:
+        if isinstance(context, str):
             context = [context]
         context = copy.deepcopy(context)
 
@@ -1221,10 +1220,10 @@ def control_context_budget(
         if reorder_context == "original":
             used = sorted(used)
         elif reorder_context == "two_stage":
-            l, r = [_ for idx, _ in enumerate(used) if idx % 2 == 0], [
+            left, right = [_ for idx, _ in enumerate(used) if idx % 2 == 0], [
                 _ for idx, _ in enumerate(used) if idx % 2 == 1
             ]
-            used = l + r[::-1]
+            used = left + right[::-1]
 
         if dynamic_context_compression_ratio > 0:
             N = len(used)
@@ -1764,8 +1763,8 @@ def match_from_compressed(response_word):
                     response_c[original_input_ids[idx]].append(idx)
             res, res_min, res_c = None, float("inf"), 1
             n = len(response_input_ids)
-            for l in response_c[response_input_ids[0]]:
-                x, y, c = 0, l, 1
+            for start_idx in response_c[response_input_ids[0]]:
+                x, y, c = 0, start_idx, 1
                 for x in range(1, n):
                     idx = bisect.bisect_right(response_c[response_input_ids[x]], y)
                     if (
@@ -1777,18 +1776,14 @@ def match_from_compressed(response_word):
                     y = response_c[response_input_ids[x]][idx]
                 if c > res_c:
                     res_c = c
-                    res_min = y - l + 1
-                    res = (l, y + 1)
-                elif c == res_c and y - l + 1 < res_min:
-                    res_min = y - l + 1
-                    res = (l, y + 1)
+                    res_min = y - start_idx + 1
+                    res = (start_idx, y + 1)
+                elif c == res_c and y - start_idx + 1 < res_min:
+                    res_min = y - start_idx + 1
+                    res = (start_idx, y + 1)
 
             if res is None:
                 return response_word
-            # while l > 0 and not self.tokenizer.convert_ids_to_tokens(original_input_ids[l]).startswith("_"):
-            #     l -= 1
-            # while r < M - 1 and not self.tokenizer.convert_ids_to_tokens(original_input_ids[l]).startswith("_"):
-            #     l -= 1
             return self.tokenizer.decode(original_input_ids[res[0] : res[1]])
 
         response_words = response.split(" ")
@@ -1798,21 +1793,21 @@ def match_from_compressed(response_word):
         ]
         N, M = len(response_words), len(original_input_ids)
         recovered_response_words = []
-        l = 0
-        while l < N:
-            if response_words[l] not in compressed_prompt:
-                recovered_response_words.append(response_words[l])
-                l += 1
+        left = 0
+        while left < N:
+            if response_words[left] not in compressed_prompt:
+                recovered_response_words.append(response_words[left])
+                left += 1
                 continue
-            r = l
+            right = left
             while (
-                r + 1 < N and " ".join(response_words[l : r + 2]) in compressed_prompt
+                right + 1 < N and " ".join(response_words[left : right + 2]) in compressed_prompt
             ):
-                r += 1
+                right += 1
 
-            match_words = match_from_compressed(" ".join(response_words[l : r + 1]))
+            match_words = match_from_compressed(" ".join(response_words[left : right + 1]))
             recovered_response_words.append(match_words)
-            l = r + 1
+            left = right + 1
         return " ".join(recovered_response_words)
 
     def get_rank_results(
@@ -2176,7 +2171,7 @@ def __get_context_prob(
                 mask = batch["mask"].to(self.device, dtype=torch.long) == 1
 
                 outputs = self.model(input_ids=ids, attention_mask=mask)
-                loss, logits = outputs.loss, outputs.logits
+                logits = outputs.logits
                 probs = F.softmax(logits, dim=-1)
 
                 for j in range(ids.shape[0]):
@@ -2355,7 +2350,7 @@ def split_string_to_words(input_string):
                 mask = batch["mask"].to(self.device, dtype=torch.long) == 1
 
                 outputs = self.model(input_ids=ids, attention_mask=mask)
-                loss, logits = outputs.loss, outputs.logits
+                logits = outputs.logits
                 probs = F.softmax(logits, dim=-1)
 
                 for j in range(ids.shape[0]):

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,3 +1,8 @@
+[project]
+name = "llmlingua"
+version = "0.2.2"
+dynamic = ["description", "readme", "requires-python", "license", "authors", "keywords", "classifiers", "dependencies", "optional-dependencies"]      
+
 [tool.black]
 line-length = 88
 target-version = ['py38']
@@ -8,4 +13,4 @@ atomic = true
 profile = "black"
 line_length = 88
 skip_gitignore = true
-known_first_party = ["llmlingua"]
+known_first_party = ["llmlingua"]
diff --git a/tests/test_llmlingua.py b/tests/test_llmlingua.py
@@ -63,7 +63,7 @@ def __init__(self, *args, **kwargs):
             import nltk
 
             nltk.download("punkt")
-        except:
+        except Exception:
             print("nltk_data exits.")
         self.llmlingua = PromptCompressor("lgaalves/gpt2-dolly", device_map="cpu")
 

diff --git a/tests/test_longllmlingua.py b/tests/test_longllmlingua.py
@@ -65,7 +65,7 @@ def __init__(self, *args, **kwargs):
             import nltk
 
             nltk.download("punkt")
-        except:
+        except Exception:
             print("nltk_data exits.")
         self.llmlingua = PromptCompressor("lgaalves/gpt2-dolly", device_map="cpu")