diff --git a/confluence-mdx/bin/reverse_sync/patch_builder.py b/confluence-mdx/bin/reverse_sync/patch_builder.py index 91dc3a35b..f0d446a67 100644 --- a/confluence-mdx/bin/reverse_sync/patch_builder.py +++ b/confluence-mdx/bin/reverse_sync/patch_builder.py @@ -397,6 +397,148 @@ def _classify_table_fragment_skip( return None +def _extract_mdx_list_entries(content: str) -> List[Dict[str, Any]]: + """MDX 리스트 블록을 path 기반 항목 목록으로 파싱한다.""" + item_re = re.compile(r'^(\s*)(?:\d+\.|\*|-|\+)(?:\s(.*)|$)') + entries: List[Dict[str, Any]] = [] + stack: List[Tuple[int, Tuple[int, ...]]] = [] + current: Optional[Dict[str, Any]] = None + + for raw_line in content.split('\n'): + m = item_re.match(raw_line) + if not m: + if current is not None: + current['continuation_lines'].append(raw_line) + continue + + indent = len(m.group(1)) + marker_text = (m.group(2) or '').strip() + + while stack and indent < stack[-1][0]: + stack.pop() + + if stack and indent == stack[-1][0]: + parent_path = stack[-2][1] if len(stack) >= 2 else () + index = stack[-1][1][-1] + 1 + stack.pop() + elif stack and indent > stack[-1][0]: + parent_path = stack[-1][1] + index = 0 + else: + parent_path = () + index = 0 + + path = parent_path + (index,) + current = { + 'path': path, + 'indent': indent, + 'marker_text': marker_text, + 'continuation_lines': [], + } + entries.append(current) + stack.append((indent, path)) + + return entries + + +def _normalize_list_continuation(lines: List[str]) -> str: + """continuation line 비교용 정규화 문자열.""" + return '\n'.join(line.strip() for line in lines if line.strip()) + + +def _find_removed_blank_item_paths( + old_content: str, + new_content: str, +) -> List[Tuple[int, ...]]: + """이전 형제로 병합된 것으로 보이는 빈 리스트 항목 path를 찾는다.""" + old_entries = _extract_mdx_list_entries(old_content) + new_entries = _extract_mdx_list_entries(new_content) + new_by_path = {entry['path']: entry for entry in new_entries} + removed_paths: List[Tuple[int, ...]] = [] + + for old_entry in old_entries: + path = old_entry['path'] + if old_entry['marker_text'] or path[-1] == 0: + continue + + old_payload = _normalize_list_continuation(old_entry['continuation_lines']) + if not old_payload: + continue + + new_same_path = new_by_path.get(path) + if new_same_path is not None and not new_same_path['marker_text']: + continue + + prev_path = path[:-1] + (path[-1] - 1,) + new_prev = new_by_path.get(prev_path) + if new_prev is None: + continue + + new_prev_payload = _normalize_list_continuation(new_prev['continuation_lines']) + if old_payload not in new_prev_payload: + continue + + removed_paths.append(path) + + return removed_paths + + +def _build_list_item_merge_patch( + mapping: BlockMapping, + old_content: str, + new_content: str, + old_plain: str, + new_plain: str, +) -> Optional[Dict[str, Any]]: + """preserved anchor 리스트에서 아이템이 제거된 경우 XHTML DOM을 조작하여 + replace_fragment 패치를 생성한다. + + 제거된 아이템의 자식 요소( 등)를 이전 아이템으로 이동하고 + 빈
  • 를 제거한다. 텍스트 변경은 _apply_text_changes로 처리한다. + """ + from bs4 import BeautifulSoup + from reverse_sync.reconstructors import _find_list_item_by_path + from reverse_sync.xhtml_patcher import _apply_text_changes + + removed_paths = _find_removed_blank_item_paths(old_content, new_content) + if not removed_paths: + return None + + soup = BeautifulSoup(mapping.xhtml_text, 'html.parser') + root = soup.find(['ol', 'ul']) + if root is None: + return None + + applied = False + for path in sorted(removed_paths, reverse=True): + removed_li = _find_list_item_by_path(root, list(path)) + prev_li = _find_list_item_by_path( + root, list(path[:-1] + (path[-1] - 1,))) + if removed_li is None or prev_li is None: + continue + + for child in list(removed_li.children): + if child.name == 'p' and child.get_text(strip=True) == '': + child.decompose() + continue + prev_li.append(child.extract()) + removed_li.decompose() + applied = True + + if not applied: + return None + + # 텍스트 변경 적용 + if root and old_plain != new_plain: + _apply_text_changes(root, old_plain, new_plain) + + return { + 'action': 'replace_fragment', + 'xhtml_xpath': mapping.xhtml_xpath, + 'new_element_xhtml': str(soup), + } + + def _emit_replacement_fragment(block: MdxBlock) -> str: """Block content를 현재 forward emitter 기준 fragment로 변환한다.""" parsed_blocks = [parsed for parsed in parse_mdx(block.content) if parsed.type != "empty"] @@ -958,6 +1100,21 @@ def _mark_used(block_id: str, m: BlockMapping): ) ) continue + # preserved anchor list + 아이템 수 변경: DOM 직접 조작으로
  • 병합/제거 + if (mapping is not None + and _contains_preserved_anchor_markup(mapping.xhtml_text) + and has_content_change): + merge_patch = _build_list_item_merge_patch( + mapping, + change.old_block.content, + change.new_block.content, + _old_plain, + _new_plain, + ) + if merge_patch is not None: + _mark_used(mapping.block_id, mapping) + patches.append(merge_patch) + continue # preserved anchor list: text-level 패치로 ac:/ri: XHTML 구조 보존 # (_apply_mdx_diff_to_xhtml 경로) # 같은 부모의 다중 변경은 순차 집계한다 (이전 결과에 누적 적용) diff --git a/confluence-mdx/bin/reverse_sync/xhtml_patcher.py b/confluence-mdx/bin/reverse_sync/xhtml_patcher.py index e7663d740..0ad42869a 100644 --- a/confluence-mdx/bin/reverse_sync/xhtml_patcher.py +++ b/confluence-mdx/bin/reverse_sync/xhtml_patcher.py @@ -343,6 +343,44 @@ def _append_text_to_tag(tag: Tag, text: str): tag.append(NavigableString(text)) +def _wrap_text_in_strong(p_tag: Tag, text: str) -> bool: + """preserved markup 바깥의 text node 일부를 으로 감싼다.""" + if not text: + return False + for node in list(p_tag.descendants): + if not isinstance(node, NavigableString): + continue + if _has_preserved_markup_ancestor(node, p_tag): + continue + node_text = str(node) + idx = node_text.find(text) + if idx == -1: + continue + + before = node_text[:idx] + matched = node_text[idx:idx + len(text)] + after = node_text[idx + len(text):] + + fragment = BeautifulSoup('', 'html.parser') + replacements: list = [] + if before: + replacements.append(NavigableString(before)) + strong = fragment.new_tag('strong') + strong.append(NavigableString(matched)) + replacements.append(strong) + if after: + replacements.append(NavigableString(after)) + + first = replacements[0] + node.replace_with(first) + prev = first + for repl in replacements[1:]: + prev.insert_after(repl) + prev = repl + return True + return False + + def _apply_strong_boundary_fixup(p_tag: Tag, new_inner_xhtml: str): """/ 보존 시 요소만 직접 수정하여 bold 경계를 교정한다. @@ -358,6 +396,16 @@ def _apply_strong_boundary_fixup(p_tag: Tag, new_inner_xhtml: str): old_strongs = p_tag.find_all('strong') new_strongs = new_soup.find_all('strong') + if len(old_strongs) < len(new_strongs): + remaining_old = [s.get_text() for s in old_strongs] + for new_s in new_strongs: + new_text = new_s.get_text() + if new_text in remaining_old: + remaining_old.remove(new_text) + continue + _wrap_text_in_strong(p_tag, new_text) + return + if len(old_strongs) > len(new_strongs): # 새 버전의 bold 텍스트 집합 구축 (매칭용) new_strong_texts = [s.get_text() for s in new_strongs] diff --git a/confluence-mdx/tests/reverse-sync/798064641/improved.mdx b/confluence-mdx/tests/reverse-sync/798064641/improved.mdx index 7135a6fd5..e963a3020 100644 --- a/confluence-mdx/tests/reverse-sync/798064641/improved.mdx +++ b/confluence-mdx/tests/reverse-sync/798064641/improved.mdx @@ -42,7 +42,6 @@ Email 발송을 위해서는 SMTP 서버가 필요하며, QueryPie에서는 SMTP 2. Password: 인증 계정의 암호를 입력합니다. 10. **Send Workflow Notification via Email 스위치**: 결재 요청 수신, 승인/반려 처리 등 워크플로우 관련 이벤트 발생 시 관련자에게 이메일 알림을 발송할지 여부를 선택합니다. 11. **Test 버튼**: SMTP 설정이 접속에 문제 없는지 확인합니다.
    - 12.
    SMTP 설정 팝업 다이얼로그
    diff --git a/confluence-mdx/tests/reverse-sync/798064641/page.v1.yaml b/confluence-mdx/tests/reverse-sync/798064641/page.v1.yaml index 4c08e8152..21f111c1f 100644 --- a/confluence-mdx/tests/reverse-sync/798064641/page.v1.yaml +++ b/confluence-mdx/tests/reverse-sync/798064641/page.v1.yaml @@ -1,5 +1,6 @@ id: '798064641' title: Email 연동 +expected_status: pass _links: base: https://querypie.atlassian.net/wiki webui: /spaces/QM/pages/798064641/Email diff --git a/confluence-mdx/tests/test_reverse_sync_patch_builder.py b/confluence-mdx/tests/test_reverse_sync_patch_builder.py index 7bbe231d7..7b0297ba5 100644 --- a/confluence-mdx/tests/test_reverse_sync_patch_builder.py +++ b/confluence-mdx/tests/test_reverse_sync_patch_builder.py @@ -2176,6 +2176,21 @@ def test_preserved_anchor_inside_strong_boundary_fixup_preserves_markup(self): assert 'link' in str(p) assert 'link:' in str(p) + def test_preserved_anchor_strong_added(self): + """preserved anchor가 있는 문단에서도 새 bold가 추가되어야 한다.""" + from bs4 import BeautifulSoup + xhtml = '
    • link Name

    ' + soup = BeautifulSoup(xhtml, 'html.parser') + fixups = [{ + 'old_plain': 'link Name', + 'new_plain': 'link Name', + 'new_inner_xhtml': 'link Name', + }] + _apply_inline_fixups(soup, fixups) + p = soup.find('p') + assert 'link' in str(p) + assert 'Name' in str(p) + def test_duplicate_text_uses_match_index(self): """동일 텍스트

    가 여러 개여도 지정한 occurrence에만 적용한다.""" from bs4 import BeautifulSoup @@ -2464,3 +2479,290 @@ def test_text_also_changed_no_extra_replace_fragment(self): assert len(rf_patches) == 0, ( f"Space+text change should not produce list replace_fragment: {rf_patches}" ) + + +# ── numbered list item 제거 시 XHTML 반영 실패 ── + + +class TestListItemRemovalWithPreservedAnchor: + """numbered list에서 빈 항목(12.)을 제거하고 콘텐츠를 이전 항목에 병합할 때 + preserved anchor()가 있는 리스트의 XHTML 패치가 누락되는 버그. + + 재현 시나리오 (page 798064641, integrating-with-email.mdx): + Original MDX: + 11. **Test 버튼** : SMTP 설정이 접속에 문제 없는지 확인합니다.
    + 12. +

    + Improved MDX: + 11. **Test 버튼**: SMTP 설정이 접속에 문제 없는지 확인합니다.
    +
    + + 현상: item 12의
  • 가 있어 preserved anchor로 분류 → + whole-fragment 교체가 차단되고, text-level 패치는 항목 구조 변경을 + 처리하지 못해 빈
  • 가 XHTML에 남음 → FC가 "12." 재생성. + """ + + def _setup_sidecar(self, xpath: str, mdx_idx: int): + entry = _make_sidecar(xpath, [mdx_idx]) + return {mdx_idx: entry} + + def test_list_item_removal_merged_into_previous_item(self): + """빈 리스트 항목(2.)을 제거하고 figure를 이전 항목(1.)에 병합하면 + XHTML 패치 적용 후 빈
  • 가 사라져야 한다. + + 최소 재현: 2개 항목의
      — item 1에 텍스트, item 2에 . + improved MDX에서 item 2를 제거하고 figure를 item 1에 병합. + + 현상: preserved anchor() 때문에 replace_fragment 차단, + text-level 패치만 적용 → 빈
    1. 제거 불가 → FC가 "2." 재생성 + """ + # XHTML:
        with 2 items, item 2 has (preserved anchor) + xhtml_text = ( + '
          ' + '
        1. Test 버튼 : 확인합니다.

        2. ' + '
        3. ' + '' + '

          캡션

          ' + '

        4. ' + '
        ' + ) + + list_mapping = _make_mapping( + 'list-1', + 'Test 버튼 : 확인합니다.캡션', + xpath='ol[1]', + type_='list', + ) + list_mapping.xhtml_text = xhtml_text + + mappings = [list_mapping] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + + # Original MDX: 2 items (item 2 is empty "2." followed by figure) + old_content = ( + '1. **Test 버튼** : 확인합니다.
        \n' + '2.\n' + '
        \n' + ' 캡션\n' + '
        \n' + ' 캡션\n' + '
        \n' + '
        \n' + ) + + # Improved MDX: item 2 removed, figure merged into item 1 + new_content = ( + '1. **Test 버튼**: 확인합니다.
        \n' + '
        \n' + ' 캡션\n' + '
        \n' + ' 캡션\n' + '
        \n' + '
        \n' + ) + + change = _make_change(0, old_content, new_content, type_='list') + mdx_to_sidecar = self._setup_sidecar('ol[1]', 0) + + sidecar_block = SidecarBlock( + 0, 'ol[1]', xhtml_text, sha256_text(xhtml_text), (1, 8), + ) + roundtrip_sidecar = _make_roundtrip_sidecar([sidecar_block]) + + patches, _, skipped = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar, + ) + + # 패치가 생성되어야 한다 + assert len(patches) >= 1, ( + f"리스트 항목 제거 변경에 대한 패치가 생성되어야 합니다. " + f"patches={patches}, skipped={skipped}" + ) + + # 핵심 검증: 패치를 XHTML에 적용한 후 빈
      1. 가 제거되어야 한다 + patched = patch_xhtml(xhtml_text, patches) + + from bs4 import BeautifulSoup + soup = BeautifulSoup(patched, 'html.parser') + ol = soup.find('ol') + assert ol is not None, "패치 후
          이 존재해야 합니다." + items = ol.find_all('li', recursive=False) + # item 2(빈 항목)가 제거되어 1개만 남아야 함 + assert len(items) == 1, ( + f"빈
        1. 항목이 제거되어 1개만 남아야 합니다. " + f"실제 항목 수: {len(items)}, patched XHTML: {patched[:300]}" + ) + + def test_middle_blank_item_removal_merges_that_item_only(self): + """중간 빈 항목이 제거되면 마지막 항목이 아니라 해당 항목만 이전 형제로 병합해야 한다.""" + xhtml_text = ( + '
            ' + '
          1. One

          2. ' + '
          3. ' + '' + '

            캡션

            ' + '

          4. ' + '
          5. Three

          6. ' + '
          ' + ) + + list_mapping = _make_mapping( + 'list-middle', + 'One캡션Three', + xpath='ol[1]', + type_='list', + ) + list_mapping.xhtml_text = xhtml_text + + mappings = [list_mapping] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + + old_content = ( + '1. One :\n' + '2.\n' + '
          \n' + ' 캡션\n' + '
          \n' + ' 캡션\n' + '
          \n' + '
          \n' + '3. Three\n' + ) + new_content = ( + '1. One:\n' + '
          \n' + ' 캡션\n' + '
          \n' + ' 캡션\n' + '
          \n' + '
          \n' + '2. Three\n' + ) + + change = _make_change(0, old_content, new_content, type_='list') + mdx_to_sidecar = self._setup_sidecar('ol[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'ol[1]', xhtml_text, sha256_text(xhtml_text), (1, 9)), + ]) + + patches, _, skipped = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar, + ) + + assert len(patches) >= 1, ( + f"중간 빈 항목 제거 변경에 대한 패치가 생성되어야 합니다. " + f"patches={patches}, skipped={skipped}" + ) + + patched = patch_xhtml(xhtml_text, patches) + + from bs4 import BeautifulSoup + soup = BeautifulSoup(patched, 'html.parser') + ol = soup.find('ol') + assert ol is not None + items = ol.find_all('li', recursive=False) + assert len(items) == 2, ( + f"중간 빈 항목만 제거되어 2개 항목이 남아야 합니다. " + f"실제 항목 수: {len(items)}, patched XHTML: {patched[:300]}" + ) + assert 'Three' in items[1].get_text(), ( + f"마지막 항목 텍스트가 보존되어야 합니다. patched XHTML: {patched[:300]}" + ) + assert items[0].find('ac:image') is not None, ( + f"제거된 빈 항목의 preserved anchor가 이전 항목으로 이동해야 합니다. " + f"patched XHTML: {patched[:300]}" + ) + + def test_nested_blank_item_removal_does_not_touch_other_nested_lists(self): + """같은 depth의 다른 하위 리스트는 건드리지 않고 제거된 경로만 병합해야 한다.""" + xhtml_text = ( + '
            ' + '
          1. Parent A

              ' + '
            1. Step A1

            2. ' + '
            3. ' + '' + '

              캡션A

              ' + '

            4. ' + '
          2. ' + '
          3. Parent B

              ' + '
            1. Step B1

            2. ' + '
            3. Step B2

            4. ' + '
          4. ' + '
          ' + ) + + list_mapping = _make_mapping( + 'list-nested', + 'Parent AStep A1캡션AParent BStep B1Step B2', + xpath='ol[1]', + type_='list', + ) + list_mapping.xhtml_text = xhtml_text + + mappings = [list_mapping] + xpath_to_mapping = {m.xhtml_xpath: m for m in mappings} + + old_content = ( + '1. Parent A\n' + ' 1. Step A1 :\n' + ' 2.\n' + '
          \n' + ' 캡션A\n' + '
          \n' + ' 캡션A\n' + '
          \n' + '
          \n' + '2. Parent B\n' + ' 1. Step B1\n' + ' 2. Step B2\n' + ) + new_content = ( + '1. Parent A\n' + ' 1. Step A1:\n' + '
          \n' + ' 캡션A\n' + '
          \n' + ' 캡션A\n' + '
          \n' + '
          \n' + '2. Parent B\n' + ' 1. Step B1\n' + ' 2. Step B2\n' + ) + + change = _make_change(0, old_content, new_content, type_='list') + mdx_to_sidecar = self._setup_sidecar('ol[1]', 0) + roundtrip_sidecar = _make_roundtrip_sidecar([ + SidecarBlock(0, 'ol[1]', xhtml_text, sha256_text(xhtml_text), (1, 12)), + ]) + + patches, _, skipped = build_patches( + [change], [change.old_block], [change.new_block], + mappings, mdx_to_sidecar, xpath_to_mapping, + roundtrip_sidecar=roundtrip_sidecar, + ) + + assert len(patches) >= 1, ( + f"중첩 리스트 항목 제거 변경에 대한 패치가 생성되어야 합니다. " + f"patches={patches}, skipped={skipped}" + ) + + patched = patch_xhtml(xhtml_text, patches) + + from bs4 import BeautifulSoup + soup = BeautifulSoup(patched, 'html.parser') + root_items = soup.find('ol').find_all('li', recursive=False) + first_nested = root_items[0].find('ol') + second_nested = root_items[1].find('ol') + assert first_nested is not None and second_nested is not None + assert len(first_nested.find_all('li', recursive=False)) == 1, ( + f"첫 번째 하위 리스트만 1개 항목으로 줄어야 합니다. patched XHTML: {patched[:400]}" + ) + assert len(second_nested.find_all('li', recursive=False)) == 2, ( + f"변경되지 않은 두 번째 하위 리스트는 2개 항목을 유지해야 합니다. " + f"patched XHTML: {patched[:400]}" + )