Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions server/workers/base/src/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def filter_duplicates(df, service, params):
df["doi_duplicate"] = False
df["has_relations"] = False
df["link_duplicate"] = False
df["pdf_link_candidates_from_duplicates"] = ""
df["duplicates"] = df.apply(
lambda x: ",".join([x["id"], x["duplicates"]])
if len(x["duplicates"].split(",")) >= 1
Expand Down
23 changes: 20 additions & 3 deletions server/workers/common/common/enrichment.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,9 @@ def apply_oa_state_improvements(df, anchor_idx, accumulator):

def apply_link_improvements(df, anchor_idx, all_links):
"""
Applies improvements for link to the anchor element.
Applies improvements for link to the anchor element: set in
pdf_link_candidates_from_duplicates column if there are any links
from duplicates that can be used for PDF lookup.

Args:
df: DataFrame with data
Expand All @@ -271,5 +273,20 @@ def apply_link_improvements(df, anchor_idx, all_links):
if all_links:
unique_links = deduplicate_links(all_links)
if unique_links:
merged_links = '; '.join(sorted(unique_links))
df.loc[anchor_idx, 'link'] = merged_links
anchor_link = get_anchor_field_value(df, anchor_idx, 'link')
unique_links_without_anchor_link = [x for x in unique_links if x != anchor_link]

merged_links = '; '.join(sorted(unique_links_without_anchor_link))
df.loc[anchor_idx, 'pdf_link_candidates_from_duplicates'] = merged_links

def get_anchor_field_value(df, anchor_idx, column_name):
"""
Returns the value of the given column for the anchor row, or None if
the column is missing or the value is empty/NaN.
"""
if column_name not in df.columns:
return None
value = df.loc[anchor_idx, column_name]
if pd.isna(value) or value == '':
return None
return value
2 changes: 1 addition & 1 deletion server/workers/orcid/src/orcid_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
'relations', 'annotations', 'repo', 'source', 'volume', 'issue', 'page', 'issn',
'citation_count', 'cited_by_wikipedia_count', 'cited_by_msm_count', 'cited_by_policies_count',
'cited_by_patents_count', 'cited_by_accounts_count', 'cited_by_fbwalls_count',
'merged_dois',
'merged_dois', 'pdf_link_candidates_from_duplicates',
'cited_by_feeds_count',
'cited_by_gplus_count',
'cited_by_rdts_count',
Expand Down
4 changes: 4 additions & 0 deletions vis/js/dataprocessing/managers/DataManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
getListLink,
getOpenAccessLink,
getOutlink,
getPdfLinkCandidatesFromDuplicates,
getValueOrZero,
getVisibleMetric,
isOpenAccess,
Expand Down Expand Up @@ -257,6 +258,9 @@ class DataManager {
paper.oa_link = getOpenAccessLink(paper, this.config);
paper.outlink = getOutlink(paper, this.config);
paper.list_link = getListLink(paper, this.config, this.context);

paper.pdf_link_candidates_from_duplicates =
getPdfLinkCandidatesFromDuplicates(paper);
}

__parseComments(paper: any) {
Expand Down
2 changes: 2 additions & 0 deletions vis/js/types/models/paper.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ export interface CommonPaperDataForAllIntegrations {
zoomedY: number;
zoomedWidth: number;
zoomedHeight: number;

pdf_link_candidates_from_duplicates: string[] | null;
}

export interface PubmedPaper extends CommonPaperDataForAllIntegrations {
Expand Down
19 changes: 19 additions & 0 deletions vis/js/utils/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -285,6 +285,25 @@ export const getListLink = (paper, config, context) => {
return {};
};

/**
* Parses the paper's pdf link candidates from duplicates into an array of strings.
*
* @param {object} paper paper object
*
* @returns array of strings or null if no candidates are found
*/
export const getPdfLinkCandidatesFromDuplicates = (paper): string[] | null => {
if (
typeof paper.pdf_link_candidates_from_duplicates !== "string" ||
!paper.pdf_link_candidates_from_duplicates
) {
return null;
}

const links = paper.pdf_link_candidates_from_duplicates.split(";");
return links.length > 0 ? links : null;
};

/**
* Parses the paper's authors string into an object array.
*
Expand Down
10 changes: 10 additions & 0 deletions vis/js/utils/usePdfLookup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
let possiblePDFs = "";
let fallbackUrl = "";
if (service === "base") {
let pdfLinkCandidatesFromDuplicates = null;

if ("pdf_link_candidates_from_duplicates" in paper) {
pdfLinkCandidatesFromDuplicates = paper.pdf_link_candidates_from_duplicates as string[] | null;
}

possiblePDFs =
encodeURIComponent(paper.link) +
";" +
Expand All @@ -59,6 +65,10 @@ const usePdfLookup = (paper: Paper, serverUrl: string, service: string) => {
.split("; ")
.map((x) => encodeURIComponent(x))
.join("; ");

if (pdfLinkCandidatesFromDuplicates) {
possiblePDFs += ";" + pdfLinkCandidatesFromDuplicates.map((x: string) => encodeURIComponent(x)).join("; ");
}
}

if (service === "openaire") {
Expand Down
2 changes: 2 additions & 0 deletions vis/test/data/papers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ const MOCK_COMMON_PAPER_DATA: CommonPaperDataForAllIntegrations = {
zoomedY: 1,
zoomedWidth: 1,
zoomedHeight: 1,

pdf_link_candidates_from_duplicates: null,
};

export const MOCK_BASE_PAPER_DATA: BasePaper = {
Expand Down