From ae40b1e077bbece497deb76765154017c2ccec58 Mon Sep 17 00:00:00 2001 From: WingEdge777 Date: Sun, 5 Apr 2026 10:59:48 +0800 Subject: [PATCH 1/5] feature : remove zhihu zhida hyper link --- packages/core/src/adapters/platforms/zhihu.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/packages/core/src/adapters/platforms/zhihu.ts b/packages/core/src/adapters/platforms/zhihu.ts index c666414b..e3738ac2 100644 --- a/packages/core/src/adapters/platforms/zhihu.ts +++ b/packages/core/src/adapters/platforms/zhihu.ts @@ -213,6 +213,12 @@ export class ZhihuAdapter extends CodeAdapter { result = result.replace(/\s*data-(?!draft)[a-z-]+="[^"]*"/gi, '') result = result.replace(/\s*style="[^"]*"/gi, '') + // 5. 去除知乎站内知识图谱链接,仅保留链接文本 + result = result.replace( + /]*href="https?:\/\/zhida\.zhihu\.com\/[^"]*"[^>]*>([\s\S]*?)<\/a>/gi, + '$1' + ) + return result } From 55616b79ec66bba2d7e2c1c69a26bc69e32e648b Mon Sep 17 00:00:00 2001 From: WingEdge777 Date: Sun, 5 Apr 2026 11:24:54 +0800 Subject: [PATCH 2/5] feature : remove zhihu zhida hyper link --- packages/core/src/adapters/platforms/zhihu.ts | 8 ++----- packages/core/src/adapters/types.ts | 3 +++ .../extension/src/lib/content-processor.ts | 24 +++++++++++++++++++ 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/packages/core/src/adapters/platforms/zhihu.ts b/packages/core/src/adapters/platforms/zhihu.ts index e3738ac2..1079e8db 100644 --- a/packages/core/src/adapters/platforms/zhihu.ts +++ b/packages/core/src/adapters/platforms/zhihu.ts @@ -38,6 +38,8 @@ export class ZhihuAdapter extends CodeAdapter { removeEmptyLines: true, removeEmptyDivs: true, removeNestedEmptyContainers: true, + // 去除知乎站内知识图谱链接,仅保留链接文本 + unwrapInternalLinks: ['zhida.zhihu.com'], } /** 知乎 API 需要的 Header 规则 */ @@ -213,12 +215,6 @@ export class ZhihuAdapter extends CodeAdapter { result = result.replace(/\s*data-(?!draft)[a-z-]+="[^"]*"/gi, '') result = result.replace(/\s*style="[^"]*"/gi, '') - // 5. 去除知乎站内知识图谱链接,仅保留链接文本 - result = result.replace( - /]*href="https?:\/\/zhida\.zhihu\.com\/[^"]*"[^>]*>([\s\S]*?)<\/a>/gi, - '$1' - ) - return result } diff --git a/packages/core/src/adapters/types.ts b/packages/core/src/adapters/types.ts index 73c46027..e2b78f7c 100644 --- a/packages/core/src/adapters/types.ts +++ b/packages/core/src/adapters/types.ts @@ -69,6 +69,9 @@ export interface PreprocessConfig { /** 将表格转换为文本(用 | 分隔列,适用于不支持表格的平台) */ convertTablesToText?: boolean + /** 去除源平台站内链接(如知乎知识图谱),仅保留链接文本 */ + unwrapInternalLinks?: string[] + /** 保留