@@ -317,20 +317,30 @@ class CompareAnalysisEngine(BatchalignEngine):
317317 tasks = [Task .COMPARE_ANALYSIS ]
318318
319319 def analyze (self , doc , ** kwargs ):
320+ from collections import defaultdict
321+
320322 matches = 0
321323 extra_main = 0
322324 extra_gold = 0
323325
326+ # Per-POS counters: pos -> {matches, insertions, deletions}
327+ pos_counts = defaultdict (lambda : {"matches" : 0 , "insertions" : 0 , "deletions" : 0 })
328+
324329 for utt in doc .content :
325330 if not isinstance (utt , Utterance ) or utt .comparison is None :
326331 continue
327332 for tok in utt .comparison :
333+ if tok .pos == "PUNCT" :
334+ continue
328335 if tok .status == "match" :
329336 matches += 1
337+ pos_counts [tok .pos ]["matches" ] += 1
330338 elif tok .status == "extra_main" :
331339 extra_main += 1
340+ pos_counts [tok .pos ]["insertions" ] += 1
332341 elif tok .status == "extra_gold" :
333342 extra_gold += 1
343+ pos_counts [tok .pos ]["deletions" ] += 1
334344
335345 total_gold = matches + extra_gold
336346 total_main = matches + extra_main
@@ -347,6 +357,15 @@ def analyze(self, doc, **kwargs):
347357 "total_main_words" : total_main ,
348358 }
349359
360+ # Add per-POS breakdown
361+ for pos in sorted (pos_counts .keys ()):
362+ counts = pos_counts [pos ]
363+ total = counts ["matches" ] + counts ["deletions" ]
364+ metrics [f"{ pos } :matches" ] = counts ["matches" ]
365+ metrics [f"{ pos } :insertions" ] = counts ["insertions" ]
366+ metrics [f"{ pos } :deletions" ] = counts ["deletions" ]
367+ metrics [f"{ pos } :total" ] = total
368+
350369 return {
351370 "doc" : doc ,
352371 "metrics" : metrics ,
0 commit comments