OpenKnowledgeMaps · andreishket · Feb 24, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 24, 2026
diff --git a/server/workers/base/src/base.py b/server/workers/base/src/base.py
@@ -251,6 +251,7 @@ def filter_duplicates(df, service, params):
     df["doi_duplicate"] = False
     df["has_relations"] = False
     df["link_duplicate"] = False
+    df["pdf_link_candidates_from_duplicates"] = ""
     df["duplicates"] = df.apply(
         lambda x: ",".join([x["id"], x["duplicates"]])
         if len(x["duplicates"].split(",")) >= 1

diff --git a/server/workers/common/common/enrichment.py b/server/workers/common/common/enrichment.py
@@ -261,7 +261,9 @@ def apply_oa_state_improvements(df, anchor_idx, accumulator):
 
 def apply_link_improvements(df, anchor_idx, all_links):
     """
-    Applies improvements for link to the anchor element.
+    Applies improvements for link to the anchor element: set in
+    pdf_link_candidates_from_duplicates column if there are any links
+    from duplicates that can be used for PDF lookup.
 
     Args:
         df: DataFrame with data
@@ -271,5 +273,20 @@ def apply_link_improvements(df, anchor_idx, all_links):
     if all_links:
         unique_links = deduplicate_links(all_links)
         if unique_links:
-            merged_links = '; '.join(sorted(unique_links))
-            df.loc[anchor_idx, 'link'] = merged_links
+            anchor_link = get_anchor_field_value(df, anchor_idx, 'link')
+            unique_links_without_anchor_link = [x for x in unique_links if x != anchor_link]
+
+            merged_links = '; '.join(sorted(unique_links_without_anchor_link))
+            df.loc[anchor_idx, 'pdf_link_candidates_from_duplicates'] = merged_links
+
+def get_anchor_field_value(df, anchor_idx, column_name):
+    """
+    Returns the value of the given column for the anchor row, or None if
+    the column is missing or the value is empty/NaN.
+    """
+    if column_name not in df.columns:
+        return None
+    value = df.loc[anchor_idx, column_name]
+    if pd.isna(value) or value == '':
+        return None
+    return value
diff --git a/server/workers/orcid/src/orcid_service.py b/server/workers/orcid/src/orcid_service.py
@@ -400,7 +400,7 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
                        'relations', 'annotations', 'repo', 'source', 'volume', 'issue', 'page', 'issn', 
                        'citation_count', 'cited_by_wikipedia_count', 'cited_by_msm_count', 'cited_by_policies_count', 
                        'cited_by_patents_count', 'cited_by_accounts_count', 'cited_by_fbwalls_count',
-                       'merged_dois',
+                       'merged_dois', 'pdf_link_candidates_from_duplicates',
                         'cited_by_feeds_count',
                         'cited_by_gplus_count',
                         'cited_by_rdts_count',

diff --git a/vis/js/dataprocessing/managers/DataManager.ts b/vis/js/dataprocessing/managers/DataManager.ts
@@ -14,6 +14,7 @@ import {
   getListLink,
   getOpenAccessLink,
   getOutlink,
+  getPdfLinkCandidatesFromDuplicates,
   getValueOrZero,
   getVisibleMetric,
   isOpenAccess,
@@ -257,6 +258,9 @@ class DataManager {
     paper.oa_link = getOpenAccessLink(paper, this.config);
     paper.outlink = getOutlink(paper, this.config);
     paper.list_link = getListLink(paper, this.config, this.context);
+
+    paper.pdf_link_candidates_from_duplicates =
+      getPdfLinkCandidatesFromDuplicates(paper);
   }
 
   __parseComments(paper: any) {

diff --git a/vis/js/types/models/paper.ts b/vis/js/types/models/paper.ts
@@ -68,6 +68,8 @@ export interface CommonPaperDataForAllIntegrations {
   zoomedY: number;
   zoomedWidth: number;
   zoomedHeight: number;
+
+  pdf_link_candidates_from_duplicates: string[] | null;
 }
 
 export interface PubmedPaper extends CommonPaperDataForAllIntegrations {

diff --git a/vis/js/utils/data.ts b/vis/js/utils/data.ts
@@ -285,6 +285,25 @@ export const getListLink = (paper, config, context) => {
   return {};
 };
 
+/**
+ * Parses the paper's pdf link candidates from duplicates into an array of strings.
+ *
+ * @param {object} paper paper object
+ *
+ * @returns array of strings or null if no candidates are found
+ */
+export const getPdfLinkCandidatesFromDuplicates = (paper): string[] | null => {
+  if (
+    typeof paper.pdf_link_candidates_from_duplicates !== "string" ||
+    !paper.pdf_link_candidates_from_duplicates
+  ) {
+    return null;
+  }
+
+  const links = paper.pdf_link_candidates_from_duplicates.split(";");
+  return links.length > 0 ? links : null;
+};
+
 /**
  * Parses the paper's authors string into an object array.
  *

diff --git a/vis/js/utils/usePdfLookup.ts b/vis/js/utils/usePdfLookup.ts
@@ -50,6 +50,12 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
       let possiblePDFs = "";
       let fallbackUrl = "";
       if (service === "base") {
+        let pdfLinkCandidatesFromDuplicates = null;
+
+        if ("pdf_link_candidates_from_duplicates" in paper) {
+          pdfLinkCandidatesFromDuplicates = paper.pdf_link_candidates_from_duplicates as string[] | null;
+        }
+
         possiblePDFs =
           encodeURIComponent(paper.link) +
           ";" +
@@ -59,6 +65,10 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
             .split("; ")
             .map((x) => encodeURIComponent(x))
             .join("; ");
+
+        if (pdfLinkCandidatesFromDuplicates) {
+          possiblePDFs += ";" + pdfLinkCandidatesFromDuplicates.map((x: string) => encodeURIComponent(x)).join("; ");
+        }
       }
 
       if (service === "openaire") {

diff --git a/vis/test/data/papers.ts b/vis/test/data/papers.ts
@@ -55,6 +55,8 @@ const MOCK_COMMON_PAPER_DATA: CommonPaperDataForAllIntegrations = {
   zoomedY: 1,
   zoomedWidth: 1,
   zoomedHeight: 1,
+
+  pdf_link_candidates_from_duplicates: null,
 };
 
 export const MOCK_BASE_PAPER_DATA: BasePaper = {