output

Jemoka · Jemoka · commit a95877e37810 · 2026-03-17T16:46:54.000-07:00
diff --git a/batchalign/pipelines/analysis/compare.py b/batchalign/pipelines/analysis/compare.py
@@ -317,20 +317,30 @@ class CompareAnalysisEngine(BatchalignEngine):
     tasks = [Task.COMPARE_ANALYSIS]
 
     def analyze(self, doc, **kwargs):
+        from collections import defaultdict
+
         matches = 0
         extra_main = 0
         extra_gold = 0
 
+        # Per-POS counters: pos -> {matches, insertions, deletions}
+        pos_counts = defaultdict(lambda: {"matches": 0, "insertions": 0, "deletions": 0})
+
         for utt in doc.content:
             if not isinstance(utt, Utterance) or utt.comparison is None:
                 continue
             for tok in utt.comparison:
+                if tok.pos == "PUNCT":
+                    continue
                 if tok.status == "match":
                     matches += 1
+                    pos_counts[tok.pos]["matches"] += 1
                 elif tok.status == "extra_main":
                     extra_main += 1
+                    pos_counts[tok.pos]["insertions"] += 1
                 elif tok.status == "extra_gold":
                     extra_gold += 1
+                    pos_counts[tok.pos]["deletions"] += 1
 
         total_gold = matches + extra_gold
         total_main = matches + extra_main
@@ -347,6 +357,15 @@ def analyze(self, doc, **kwargs):
             "total_main_words": total_main,
         }
 
+        # Add per-POS breakdown
+        for pos in sorted(pos_counts.keys()):
+            counts = pos_counts[pos]
+            total = counts["matches"] + counts["deletions"]
+            metrics[f"{pos}:matches"] = counts["matches"]
+            metrics[f"{pos}:insertions"] = counts["insertions"]
+            metrics[f"{pos}:deletions"] = counts["deletions"]
+            metrics[f"{pos}:total"] = total
+
         return {
             "doc": doc,
             "metrics": metrics,
diff --git a/batchalign/version b/batchalign/version
@@ -1,3 +1,3 @@
-0.8.2
+0.8.2-post.1
 Feb 27th 2026
 adds new feature regarding compare