-
Notifications
You must be signed in to change notification settings - Fork 8
feat(website, backend): SeqSet citation tracking #6304
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c327cf1
5e669f6
2f82180
9497e96
8c831f0
5730e79
ab09d96
e23931d
05bcef9
27470df
2c67ebc
b891a49
e38caa8
d3b5500
9199679
f0a2c64
9e865b0
b8c8d18
5c20008
a98c60d
0890599
c9ab8a3
c31180a
2e0a647
5d20ffa
c136152
f35d284
cc86479
cb53341
588f7bb
040a7a9
f1c5c6a
ddfd5c1
55c8a4d
1562e46
f5c39ba
766a462
814564e
69b1e16
162d834
bacb448
f1eca3c
5ac697f
d728ae5
90e27c8
e3ae3f5
978e375
ca32d4d
f04e67e
d16b13c
ce4ffae
77e25dd
5e939b9
ed87cfc
4dde77f
9a56335
64215e0
2d8c1c5
9300db2
7b6d90c
4c1d82c
112f49e
c14e35d
cf19235
0b6d55f
153cdac
53e0eae
f35ab9d
c686b17
04317b6
3ad141a
8e0ff24
65229a7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2,11 +2,17 @@ package org.loculus.backend.service.crossref | |
|
|
||
| import mu.KotlinLogging | ||
| import org.jsoup.Jsoup | ||
| import org.jsoup.parser.Parser | ||
| import org.loculus.backend.api.CitationContributor | ||
| import org.loculus.backend.api.CitationSource | ||
| import org.loculus.backend.api.SeqSetCitationSource | ||
| import org.loculus.backend.utils.DateProvider | ||
| import org.redundent.kotlin.xml.PrintOptions | ||
| import org.redundent.kotlin.xml.xml | ||
| import org.springframework.boot.context.properties.ConfigurationProperties | ||
| import org.springframework.stereotype.Service | ||
| import java.io.DataOutputStream | ||
| import java.io.IOException | ||
| import java.io.OutputStreamWriter | ||
| import java.io.PrintWriter | ||
| import java.net.HttpURLConnection | ||
|
|
@@ -40,7 +46,7 @@ data class DoiEntry( | |
| ) | ||
|
|
||
| @Service | ||
| class CrossRefService(final val properties: CrossRefServiceProperties) { | ||
| class CrossRefService(private val properties: CrossRefServiceProperties, private val dateProvider: DateProvider) { | ||
| val isActive = properties.endpoint != null && | ||
| properties.username != null && | ||
| properties.password != null && | ||
|
|
@@ -49,16 +55,106 @@ class CrossRefService(final val properties: CrossRefServiceProperties) { | |
| properties.email != null && | ||
| properties.organization != null && | ||
| properties.hostUrl != null | ||
| val dateTimeFormatterMM = DateTimeFormatter.ofPattern("MM") | ||
| val dateTimeFormatterdd = DateTimeFormatter.ofPattern("dd") | ||
| val dateTimeFormatteryyyy = DateTimeFormatter.ofPattern("yyyy") | ||
| val doiPrefix: String? = properties.doiPrefix | ||
| val dateTimeFormatterMM: DateTimeFormatter = DateTimeFormatter.ofPattern("MM") | ||
| val dateTimeFormatterdd: DateTimeFormatter = DateTimeFormatter.ofPattern("dd") | ||
| val dateTimeFormatteryyyy: DateTimeFormatter = DateTimeFormatter.ofPattern("yyyy") | ||
|
|
||
| private fun checkIsActive() { | ||
| if (!isActive) { | ||
| throw RuntimeException("The CrossRefService is not active as it has not been configured.") | ||
| } | ||
| } | ||
|
|
||
| fun parseCrossRefCitedByXML(citedByXML: String): List<SeqSetCitationSource> { | ||
| val parser = Parser.xmlParser().setTrackErrors(1) | ||
| val doc = Jsoup.parse(citedByXML, "", parser) | ||
|
|
||
| if (parser.errors.isNotEmpty()) { | ||
| throw IllegalStateException("Invalid XML: ${parser.errors}") | ||
| } | ||
|
|
||
| val crossRefResult = doc.children().firstOrNull() | ||
| if (crossRefResult?.tagName() != "crossref_result") { | ||
| throw IllegalStateException("Invalid CrossRef root element: ${crossRefResult?.tagName()}") | ||
| } | ||
|
|
||
| return crossRefResult.select("forward_link").map { forwardLink -> | ||
| val seqSetDOI = forwardLink.attr("doi").takeIf { it.isNotBlank() } | ||
| ?: throw IllegalStateException("CrossRef forward_link missing SeqSet DOI: $forwardLink") | ||
|
|
||
| val citationElement = | ||
| forwardLink.children().firstOrNull() | ||
| ?: throw IllegalStateException( | ||
| "CrossRef forward_link has no citation element under SeqSet $seqSetDOI: $forwardLink", | ||
| ) | ||
|
|
||
| val sourceDOI = citationElement.selectFirst("doi")?.text()?.takeIf { it.isNotBlank() } | ||
| ?: throw IllegalStateException( | ||
| "CrossRef citation source missing DOI for SeqSet $seqSetDOI: $citationElement", | ||
| ) | ||
| val title = citationElement.selectFirst("title")?.text()?.takeIf { it.isNotBlank() } | ||
| ?: throw IllegalStateException( | ||
| "CrossRef citation source missing title for SeqSet $seqSetDOI: $citationElement", | ||
| ) | ||
| val year = citationElement.selectFirst("year")?.text()?.toIntOrNull() | ||
| ?: throw IllegalStateException( | ||
| "CrossRef citation source missing or non-numeric year for SeqSet $seqSetDOI: $citationElement", | ||
| ) | ||
| val contributors = citationElement.select("contributor").mapNotNull { c -> | ||
| val givenName = c.selectFirst("given_name")?.text().orEmpty() | ||
| val surname = c.selectFirst("surname")?.text().orEmpty() | ||
| if (givenName.isEmpty() && surname.isEmpty()) { | ||
| null | ||
| } else { | ||
| CitationContributor(givenName, surname) | ||
| } | ||
| } | ||
|
|
||
| SeqSetCitationSource( | ||
| source = CitationSource( | ||
| sourceDOI = sourceDOI, | ||
| title = title, | ||
| year = year, | ||
| contributors = contributors, | ||
| ), | ||
| seqSetDOIs = setOf(seqSetDOI), | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| fun getCrossRefCitedBy(doiPrefix: String): List<SeqSetCitationSource> { | ||
| checkIsActive() | ||
|
|
||
| // End date is the current date at time of request | ||
| val endDate = dateProvider.getCurrentDate() | ||
| val connection = URI( | ||
| properties.endpoint + | ||
| "/servlet/getForwardLinks?usr=${properties.username}&pwd=${properties.password}&doi=$doiPrefix&endDate=$endDate&include_postedcontent=true", | ||
|
tombch marked this conversation as resolved.
|
||
| ).toURL().openConnection() as HttpURLConnection | ||
|
tombch marked this conversation as resolved.
Fixed
Comment on lines
+132
to
+134
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
This request string injects Useful? React with 👍 / 👎. |
||
| connection.connectTimeout = 10_000 | ||
| connection.readTimeout = 30_000 | ||
| connection.requestMethod = "GET" | ||
|
|
||
| val response = try { | ||
|
tombch marked this conversation as resolved.
|
||
| val responseCode = connection.responseCode | ||
| if (responseCode != HttpURLConnection.HTTP_OK) { | ||
| throw RuntimeException("CrossRef citedBy request returned $responseCode") | ||
| } | ||
| connection.inputStream.use { String(it.readAllBytes()) } | ||
| } catch (e: IOException) { | ||
| throw RuntimeException("CrossRef citedBy request failed for DOI $doiPrefix", e) | ||
| } finally { | ||
| connection.disconnect() | ||
| } | ||
|
|
||
| return try { | ||
| parseCrossRefCitedByXML(response) | ||
| } catch (e: Exception) { | ||
| throw RuntimeException("Failed to parse CrossRef citedBy response for DOI $doiPrefix", e) | ||
| } | ||
| } | ||
|
|
||
| fun generateCrossRefXML(entry: DoiEntry): String { | ||
| checkIsActive() | ||
|
|
||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Similar to my comment below, to me the thing that people contribute to is the Publication (or CitingSource etc.), not to the act of citing. The citation is a connection between two publications, not really it's own entity
(of course, feel free to disagree if you think I'm bikeshedding)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yeah I agree in the current layout this one should be
CitingSourceContributorThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
IMO now with
CitationSource, this one can just stay asCitationContributor