diff --git a/addon/appModules/_chatCache.py b/addon/appModules/_chatCache.py index 4bfee75..5328f9b 100644 --- a/addon/appModules/_chatCache.py +++ b/addon/appModules/_chatCache.py @@ -26,6 +26,10 @@ _tempPath = None _chatRoomName = None _lastMatchedIdx = None +# Reply context for the most recent successful lookup, or None when the +# matched bubble was not a reply. Populated by lookupMessage(); consumed +# by the left-arrow handler so users can hear the quoted original. +_lastReplyInfo = None # _messageIndexMap[i] = 1-based message number for position i (0 for date rows) _messageIndexMap = [] @@ -53,13 +57,14 @@ def setCache(messages, tempPath, chatRoomName): cache always reflects a single chat room. Builds _messageIndexMap and _messageDateGroups using the same counting rules as MessageReaderDialog. """ - global _messages, _tempPath, _chatRoomName, _lastMatchedIdx + global _messages, _tempPath, _chatRoomName, _lastMatchedIdx, _lastReplyInfo global _messageIndexMap, _messageDateGroups clearCache() _messages = list(messages or []) _tempPath = tempPath _chatRoomName = chatRoomName _lastMatchedIdx = None + _lastReplyInfo = None msgIdx = 0 lastDateIdx = -1 @@ -81,13 +86,14 @@ def setCache(messages, tempPath, chatRoomName): def clearCache(): """Reset cache state and remove the temp export file if present.""" - global _messages, _tempPath, _chatRoomName, _lastMatchedIdx + global _messages, _tempPath, _chatRoomName, _lastMatchedIdx, _lastReplyInfo global _messageIndexMap, _messageDateGroups path = _tempPath _messages = [] _tempPath = None _chatRoomName = None _lastMatchedIdx = None + _lastReplyInfo = None _messageIndexMap = [] _messageDateGroups = [] if path: @@ -252,6 +258,115 @@ def _formatMessage(msg): return f"{name} {content} {timeStr}".strip() +def _detectReplyNames(ocrText): + """Identify both names when the OCR shows a reply preview. + + Reply bubbles show the actual sender's name first (above the bubble), + then a quote block containing the quoted user's name and message + preview, then the actual reply content. OCR captures both names, and + the longer quoted preview otherwise wins on content overlap alone. + + Returns (replySender, quotedSender) when 2+ distinct cached sender + names appear in the OCR text (reply pattern). Names are ordered by + their position in the OCR text — the earliest is the actual sender, + the next is the quoted user being replied to. Returns (None, None) + when the OCR doesn't look like a reply. + """ + if not ocrText: + return None, None + occurrences = [] + seen = set() + for msg in _messages: + if msg.get("type") != "message": + continue + name = msg.get("name", "") + if not name or name in seen: + continue + # Require the name at the start of a line, optionally preceded by + # non-letter glyphs (e.g. the "0 " quote-indicator icon LINE + # renders before quoted-user names in reply bubbles), and not + # immediately followed by another CJK/Latin letter. This: + # - lets quoted names like "0 王昱涵" still match (real LINE OCR); + # - blocks names mentioned mid-sentence (e.g. "感謝Bob你的幫助" + # — 感謝 is CJK so the non-letter prefix can't consume it); + # - blocks short names matching inside longer names (e.g. + # "王昱" must not match a line "王昱涵" — the lookahead fails + # because 涵 is a CJK letter). + m = re.search( + r"(?m)^[^一-鿿぀-ヿa-zA-Z]*" + re.escape(name) + r"(?![一-鿿぀-ヿa-zA-Z])", + ocrText, + ) + if m: + occurrences.append((m.start(), name)) + seen.add(name) + if len(occurrences) < 2: + return None, None + occurrences.sort(key=lambda t: t[0]) + return occurrences[0][1], occurrences[1][1] + + +def _findQuotedOriginal(replyIdx, quotedSender, ocrText): + """Locate the original message a reply is quoting. + + Replies always come AFTER the original in chat order, so we scan + upward from ``replyIdx`` for messages by ``quotedSender`` and pick + the one whose content has the largest overlap with the OCR text + (which contains a fragment of the quoted preview). The OCR preview + is shorter than the full message — substring containment in either + direction or longest-common-substring (≥ ``_MIN_FUZZY_OVERLAP``) + count as a match. + + Returns ``(msg, idx)`` on success, ``(None, None)`` otherwise. + """ + if not quotedSender or replyIdx <= 0: + return None, None + ocrNorm = _normalize(ocrText) + if not ocrNorm: + return None, None + bestOverlap = 0 + bestIdx = -1 + for i in range(replyIdx - 1, -1, -1): + msg = _messages[i] + if msg.get("type") != "message": + continue + if msg.get("name") != quotedSender: + continue + contentNorm = _normalize(msg.get("content", "")) + if not contentNorm: + continue + if contentNorm in ocrNorm: + overlap = len(contentNorm) + elif ocrNorm in contentNorm: + overlap = len(ocrNorm) + else: + overlap = _longestCommonSubstring(contentNorm, ocrNorm) + if overlap >= _MIN_FUZZY_OVERLAP and overlap > bestOverlap: + bestOverlap = overlap + bestIdx = i + if bestIdx < 0: + return None, None + return _messages[bestIdx], bestIdx + + +def getLastReplyInfo(): + """Return reply context for the most recent successful lookup. + + Returns a dict with ``replySender``, ``replyContent``, ``replyTime``, + ``originalName``, ``originalContent``, ``originalTime`` and + ``originalIdx`` when the matched bubble was a reply, otherwise + ``None``. ``originalContent``/``originalTime``/``originalIdx`` may + be ``None`` when the original message couldn't be located in the + cache (e.g. it was outside the exported window). + """ + return _lastReplyInfo + + +def clearLastReplyInfo(): + """Clear cached reply context so the left-arrow handler fires only once.""" + global _lastReplyInfo + _lastReplyInfo = None + + def lookupMessage(ocrText): """Return the cached message that best matches the OCR snippet. @@ -268,7 +383,10 @@ def lookupMessage(ocrText): Returns: (formattedText, index) on match, otherwise (None, None). """ - global _lastMatchedIdx + global _lastMatchedIdx, _lastReplyInfo + # Default to no reply context; populated below when the matched bubble + # turns out to be a reply. + _lastReplyInfo = None if not isActive() or not ocrText: return None, None @@ -296,6 +414,13 @@ def lookupMessage(ocrText): # Date group of the cursor position (−1 if no date seen yet) cursorDateGroup = _messageDateGroups[cursor] if cursor < len(_messageDateGroups) else -1 + # Reply pattern: when the bubble shows a quote preview, OCR contains both + # the actual sender's name and the quoted user's name. The quoted preview + # usually has a longer content overlap than the actual reply, so without + # this restriction the cache would return the quoted message instead of + # the reply that the user just navigated to. + replySender, quotedSender = _detectReplyNames(ocrText) + bestScore = 0.0 bestIdx = -1 @@ -307,6 +432,8 @@ def lookupMessage(ocrText): msgTime = msg.get("time", "") if not msgContentNorm: continue + if replySender and msgName != replySender: + continue # Try exact substring containment first — that's the strongest signal. contentOverlap = 0 @@ -353,4 +480,21 @@ def lookupMessage(ocrText): f" msgIdx={_messageIndexMap[bestIdx]}" f" dateGroup=[{_messageDateGroups[bestIdx]}]: {_formatMessage(_messages[bestIdx])!r}", ) + + # Reply context: when the OCR pattern indicated a reply, locate the + # original (quoted) message upward in the cache so the left-arrow + # handler can read it on demand. + if replySender and quotedSender: + matched = _messages[bestIdx] + originalMsg, originalIdx = _findQuotedOriginal(bestIdx, quotedSender, ocrText) + _lastReplyInfo = { + "replySender": replySender, + "replyContent": matched.get("content", ""), + "replyTime": matched.get("time", ""), + "originalName": quotedSender, + "originalContent": originalMsg.get("content", "") if originalMsg else None, + "originalTime": originalMsg.get("time", "") if originalMsg else None, + "originalIdx": originalIdx if originalMsg else None, + } + return _formatMessage(_messages[bestIdx]), bestIdx diff --git a/addon/appModules/line.py b/addon/appModules/line.py index e980c32..976d9b1 100644 --- a/addon/appModules/line.py +++ b/addon/appModules/line.py @@ -564,10 +564,10 @@ def _getVoiceCallConfirmationState(text): if action != "join": compact = _normalizeVoiceCallConfirmationLine(text) lower = compact.lower() - hasJoinHint = any( - keyword in compact - for keyword in ("加入", "參加", "参加", "已加入", "已參加", "已参加") - ) or "join" in lower + hasJoinHint = ( + any(keyword in compact for keyword in ("加入", "參加", "参加", "已加入", "已參加", "已参加")) + or "join" in lower + ) if hasJoinHint and isGroup: action = "join" @@ -5930,12 +5930,30 @@ def _restoreClipboard(original): if cacheActive: cachedText, _cacheIdx = _chatCache.lookupMessage(initialOcrText) if cachedText: + replyInfo = _chatCache.getLastReplyInfo() + if replyInfo: + # Announce as "Sender 回覆 OriginalSender Content Time" + # in one utterance so the reply context and the time + # can't be split across two announcements (the + # follow-up timestamp element would otherwise repeat + # the message with the time on its own). + parts = [ + replyInfo["replySender"], + "回覆", + replyInfo["originalName"], + replyInfo["replyContent"], + ] + if replyInfo.get("replyTime"): + parts.append(replyInfo["replyTime"]) + announceText = " ".join(parts) + else: + announceText = cachedText log.info( - f"LINE: copy-read served from chat cache: {cachedText!r}", + f"LINE: copy-read served from chat cache: {announceText!r}", ) _restoreClipboard(origClip) speech.cancelSpeech() - ui.message(cachedText) + ui.message(announceText) return else: log.debug( @@ -9403,10 +9421,36 @@ def script_navigateAndTrack(self, gesture): navigating with Tab/arrows. This script sends the key through, waits briefly for LINE to process it, then queries the UIA focused element directly and announces it. + + Special case: if the most recently announced cached message was + a reply, pressing left arrow speaks the original (quoted) + message instead of navigating, so users can hear what the reply + is replying to. """ if _suppressAddon: gesture.send() return + try: + keyName = gesture.mainKeyName + except Exception: + keyName = None + if keyName == "leftArrow": + try: + from . import _chatCache + + replyInfo = _chatCache.getLastReplyInfo() + except Exception: + log.debug( + "LINE: chat cache reply lookup failed", + exc_info=True, + ) + replyInfo = None + if replyInfo and replyInfo.get("originalContent"): + ui.message( + f"{replyInfo['originalName']} {replyInfo['originalContent']}", + ) + _chatCache.clearLastReplyInfo() + return global _lastOCRElement, _chatListMode # Exiting chat list mode on Tab/Shift+Tab navigation _chatListMode = False diff --git a/tests/test_chat_cache.py b/tests/test_chat_cache.py index 467844c..caf4fc2 100644 --- a/tests/test_chat_cache.py +++ b/tests/test_chat_cache.py @@ -248,6 +248,238 @@ def test_short_content_requires_time_match_to_avoid_false_positives(): assert "有" in formatted2 +def test_reply_bubble_matches_actual_reply_not_quoted_preview(): + """When a bubble shows a reply preview (sender + quoted message), + OCR captures both names plus the quoted text. The lookup must match + the actual reply content, not the longer quoted preview.""" + _reset_cache( + [ + { + "type": "message", + "name": "王昱涵", + "content": "然後認領想要的工作(可揚你可以休息\n1. 科系有兩個表格爆掉了要改一下\n2. 格子要合併儲存格", + "time": "22:20", + }, + {"type": "message", "name": "陳禹安", "content": "那我用標題", "time": "10:26"}, + {"type": "message", "name": "莊忠諺", "content": "我修結論", "time": "10:27"}, + ], + ) + + # 莊忠諺 replies to 王昱涵's earlier message. OCR captures both names + # plus a chunk of the quoted preview, then the actual reply "我修結論". + # Without the reply-sender filter, the long quoted overlap would win. + formatted, idx = chat_cache.lookupMessage( + "莊忠諺\n0 王昱涵\n然後認領想要的工作\n( 可揚你可以休息 \n我修結論", + ) + assert idx == 2 + assert "我修結論" in formatted + + # Same shape for the 陳禹安 reply. + formatted2, idx2 = chat_cache.lookupMessage( + "陳禹安\n0 王昱涵\n然後認領想要的工作\n( 可揚你可以休息 \n那我用標題\n上午 10:26", + ) + assert idx2 == 1 + assert "那我用標題" in formatted2 + + +def test_reply_lookup_exposes_original_message_for_left_arrow(): + """After matching a reply, getLastReplyInfo() returns the quoted + original located upward in the cache, including its content for + the left-arrow read-aloud handler.""" + _reset_cache( + [ + { + "type": "message", + "name": "王昱涵", + "content": "然後認領想要的工作(可揚你可以休息\n1. 科系有兩個表格爆掉了", + "time": "22:20", + }, + {"type": "message", "name": "陳禹安", "content": "那我用標題", "time": "10:26"}, + {"type": "message", "name": "莊忠諺", "content": "我修結論", "time": "10:27"}, + ], + ) + + formatted, idx = chat_cache.lookupMessage( + "莊忠諺\n0 王昱涵\n然後認領想要的工作\n( 可揚你可以休息 \n我修結論", + ) + assert idx == 2 + assert "我修結論" in formatted + + info = chat_cache.getLastReplyInfo() + assert info is not None + assert info["replySender"] == "莊忠諺" + assert info["replyContent"] == "我修結論" + assert info["originalName"] == "王昱涵" + assert info["originalIdx"] == 0 + assert "然後認領想要的工作" in info["originalContent"] + + +def test_reply_lookup_clears_reply_info_for_non_reply_message(): + """Non-reply OCR (single name) must clear stale reply info so the + left-arrow handler doesn't read an old original.""" + _reset_cache( + [ + {"type": "message", "name": "Alice", "content": "舊訊息", "time": "09:00"}, + {"type": "message", "name": "Bob", "content": "回覆內容", "time": "09:05"}, + {"type": "message", "name": "Bob", "content": "後續訊息", "time": "09:10"}, + ], + ) + + # First lookup: a reply (2 names) — populates reply info. + chat_cache.lookupMessage("Bob\n0 Alice\n舊訊息\n回覆內容\n上午 9 : 05") + assert chat_cache.getLastReplyInfo() is not None + + # Next lookup: regular non-reply message — reply info must clear. + chat_cache.lookupMessage("Bob 後續訊息 上午 9 : 10") + assert chat_cache.getLastReplyInfo() is None + + +def test_reply_lookup_clears_reply_info_when_no_match(): + """When the cache can't match anything, stale reply info must clear.""" + _reset_cache( + [ + {"type": "message", "name": "Alice", "content": "舊訊息", "time": "09:00"}, + {"type": "message", "name": "Bob", "content": "回覆", "time": "09:05"}, + ], + ) + + chat_cache.lookupMessage("Bob\n0 Alice\n舊訊息\n回覆\n上午 9 : 05") + assert chat_cache.getLastReplyInfo() is not None + + # Unrelated OCR — no match, reply info must clear. + formatted, idx = chat_cache.lookupMessage("完全不同的內容沒有時間") + assert formatted is None + assert chat_cache.getLastReplyInfo() is None + + +def test_find_quoted_original_only_searches_upward(): + """The original message is always BEFORE the reply in chat order; + never match a later message even if it has the same content.""" + _reset_cache( + [ + {"type": "message", "name": "Alice", "content": "說了某句話", "time": "09:00"}, + {"type": "message", "name": "Bob", "content": "回應", "time": "09:05"}, + {"type": "message", "name": "Alice", "content": "說了某句話", "time": "10:00"}, + ], + ) + + # Bob (idx=1) replies to Alice's earlier message (idx=0). Even though + # the later Alice message (idx=2) has identical content, the search + # upward must pick idx=0. + chat_cache.lookupMessage("Bob\n0 Alice\n說了某句話\n回應\n上午 9 : 05") + info = chat_cache.getLastReplyInfo() + assert info is not None + assert info["originalIdx"] == 0 + + +def test_reply_filter_does_not_engage_when_only_one_name_in_ocr(): + """Single-name OCR is not a reply preview — the filter must not engage, + otherwise messages from anyone else become unmatchable.""" + _reset_cache( + [ + {"type": "message", "name": "Alice", "content": "早安", "time": "09:00"}, + {"type": "message", "name": "Bob", "content": "回覆內容", "time": "09:05"}, + ], + ) + + # Only Bob appears in OCR; reply filter must not exclude Bob's own + # message. (If it did engage incorrectly using "Alice", Bob's message + # would be filtered out and we'd return None.) + formatted, idx = chat_cache.lookupMessage("Bob\n回覆內容\n上午 9 : 05") + assert idx == 1 + assert "回覆內容" in formatted + + +def test_reply_filter_not_triggered_by_name_in_message_body(): + """A name mentioned inside message text must not trigger reply detection. + + If Alice sends '感謝Bob你的幫助' and both Alice and Bob are in the cache, + the old find() approach would detect two names and wrongly enter reply + mode. The line-anchored regex must not match 'Bob' mid-sentence. + """ + _reset_cache( + [ + {"type": "message", "name": "Bob", "content": "沒問題", "time": "09:00"}, + { + "type": "message", + "name": "Alice", + "content": "感謝Bob你的幫助", + "time": "09:05", + }, + ], + ) + + # OCR for Alice's message — 'Bob' is inside the content line, not a + # standalone line. lookupMessage must match Alice's message normally + # (idx=1) without entering reply mode. + formatted, idx = chat_cache.lookupMessage("Alice\n感謝Bob你的幫助\n上午 9 : 05") + assert idx == 1 + assert chat_cache.getLastReplyInfo() is None + + +def test_reply_detection_handles_quote_indicator_prefix(): + """Real LINE OCR renders a quote-indicator glyph (often "0 ") before + the quoted user name in reply bubbles, so the quoted name does NOT + occupy a standalone line. The regex must still detect it. + + Regression for an over-strict ``^name$`` regex that broke the + original reply-bubble fix on actual LINE OCR. + """ + _reset_cache( + [ + { + "type": "message", + "name": "王昱涵", + "content": "然後認領想要的工作(可揚你可以休息", + "time": "22:20", + }, + {"type": "message", "name": "莊忠諺", "content": "我修結論", "time": "10:23"}, + ], + ) + + # Exact OCR shape from the LINE log: actual sender on its own line, + # quoted user preceded by the "0 " quote-indicator glyph. + formatted, idx = chat_cache.lookupMessage( + "莊忠諺\n0 王昱涵\n然後認領想要的工作\n( 可揚你可以休息 \n我修結論", + ) + assert idx == 1 + assert "我修結論" in formatted + info = chat_cache.getLastReplyInfo() + assert info is not None + assert info["replySender"] == "莊忠諺" + assert info["originalName"] == "王昱涵" + + +def test_reply_filter_not_triggered_by_substring_name(): + """A short name that is a substring of a longer name must not match. + + If '王昱' and '王昱涵' are both in the cache and the OCR line is + '王昱涵', the regex must not match '王昱' against that line. + """ + _reset_cache( + [ + { + "type": "message", + "name": "王昱", + "content": "你好", + "time": "09:00", + }, + { + "type": "message", + "name": "王昱涵", + "content": "收到", + "time": "09:05", + }, + ], + ) + + # OCR for 王昱涵's message — only '王昱涵' occupies a standalone line. + # '王昱' must NOT match, so only one name is found → no reply filter. + formatted, idx = chat_cache.lookupMessage("王昱涵\n收到\n上午 9 : 05") + assert idx == 1 + assert chat_cache.getLastReplyInfo() is None + + def test_lookup_returns_none_when_ocr_text_unrelated(): _reset_cache( [