From 4a5e99f44998a13a54d0266b5aa50805e87f55cf Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Mon, 18 Sep 2023 21:18:23 +0200 Subject: [PATCH 1/7] Patents jsonschema --- iroko/patents/__init__.py | 16 ++ iroko/patents/api.py | 129 ++++++++++ iroko/patents/cli.py | 27 +++ iroko/patents/ext.py | 28 +++ iroko/patents/fixtures.py | 103 ++++++++ iroko/patents/jsonschemas/__init__.py | 22 ++ .../jsonschemas/patents/patent-v1.0.0.json | 226 ++++++++++++++++++ iroko/patents/loaders/__init__.py | 33 +++ iroko/patents/mappings/__init__.py | 21 ++ iroko/patents/mappings/v6/__init__.py | 15 ++ .../mappings/v6/persons/patent-v1.0.0.json | 147 ++++++++++++ iroko/patents/marshmallow/__init__.py | 20 ++ iroko/patents/marshmallow/json.py | 118 +++++++++ iroko/patents/permissions.py | 27 +++ iroko/patents/rest.py | 85 +++++++ iroko/patents/search.py | 19 ++ iroko/patents/serializers/__init__.py | 41 ++++ iroko/patents/utils.py | 65 +++++ iroko/pidstore/pids.py | 7 +- 19 files changed, 1148 insertions(+), 1 deletion(-) create mode 100755 iroko/patents/__init__.py create mode 100755 iroko/patents/api.py create mode 100644 iroko/patents/cli.py create mode 100644 iroko/patents/ext.py create mode 100644 iroko/patents/fixtures.py create mode 100644 iroko/patents/jsonschemas/__init__.py create mode 100644 iroko/patents/jsonschemas/patents/patent-v1.0.0.json create mode 100644 iroko/patents/loaders/__init__.py create mode 100644 iroko/patents/mappings/__init__.py create mode 100644 iroko/patents/mappings/v6/__init__.py create mode 100644 iroko/patents/mappings/v6/persons/patent-v1.0.0.json create mode 100644 iroko/patents/marshmallow/__init__.py create mode 100644 iroko/patents/marshmallow/json.py create mode 100644 iroko/patents/permissions.py create mode 100644 iroko/patents/rest.py create mode 100644 iroko/patents/search.py create mode 100644 iroko/patents/serializers/__init__.py create mode 100644 iroko/patents/utils.py diff --git a/iroko/patents/__init__.py b/iroko/patents/__init__.py new file mode 100755 index 00000000..36c59dd1 --- /dev/null +++ b/iroko/patents/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + + +"""iroko.""" + +from __future__ import absolute_import, print_function + +from .ext import IrokoPatents + +__all__ = ('IrokoPatents', ) diff --git a/iroko/patents/api.py b/iroko/patents/api.py new file mode 100755 index 00000000..c7619939 --- /dev/null +++ b/iroko/patents/api.py @@ -0,0 +1,129 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# +import json + +from elasticsearch.exceptions import NotFoundError +from invenio_pidstore.resolver import Resolver +from invenio_pidstore.models import PersistentIdentifier +from invenio_indexer.api import RecordIndexer + +from iroko.api import IrokoBaseRecord +from iroko.organizations.api import OrganizationRecord +from iroko.persons.api import PersonRecord +from iroko.pidstore import pids +from iroko.utils import remove_nulls +from iroko.pidstore.pids import ( + IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas, + ) + + +class PatentRecord (IrokoBaseRecord): + _schema = "patents/patent-v1.0.0.json" + + @classmethod + def load_from_json_file(cls, file_path): + """bulk import of patent from a json file + expect spi format""" + + resolver = Resolver( + pid_type=pids.PATENT_PID_TYPE, + object_type=pids.IROKO_OBJECT_TYPE, + getter=PatentRecord.get_record, + ) + # per = PersonRecord.get_record_by_pid_value(per_pid) + with open(file_path) as _file: + patents = json.load(_file, object_hook=remove_nulls) + a = 0 + for data in patents: + a = a + 1 + patent = PatentRecord(data) + del patent['_id'] + print(patent) + patentRecord = None + patentRecord, msg = cls.resolve_and_update(data=patent) + print(patentRecord) + if not patentRecord: + print("no pids found, creating patent") + patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'created' + print('====================================', a) + + + @classmethod + def get_pat_by_pid(cls, pid_value, with_deleted=False): + return cls.get_record_by_pid_value(pid_value) + + @classmethod + def create_or_update(cls, pat_uuid, data, **kwargs): + """Create or update OrganizationRecord.""" + + # assert pat_uuid + pat, msg = cls.resolve_and_update(pat_uuid, data) + # if resolve_and_update do no return, then is not existed pat, so trying to create one + if not pat: + print("no pids found, creating patent") + created_pat = cls.create(data, iroko_pid_type=pids.PATENT_PID_TYPE, + iroko_pid_value=pat_uuid) + pat = created_pat + msg = 'created' + + return pat, msg + + @classmethod + def delete(cls, data, vendor=None, delindex=True, force=False): + """Delete a IrokoRecord record.""" + assert data.get(cls.pid_uuid_field) + pid = data.get(cls.pid_uuid_field) + record = cls.get_record_by_pid_value(pid) + pid.delete() + result = record.delete(force=force) + if delindex: + try: + RecordIndexer().delete(record) + except NotFoundError: + pass + return result + + + + +def fixture_spi_fields(person: PersonRecord, org: OrganizationRecord): + """hard code fixtures of spi data, coming from human resources of cuban institutions """ + country_code = 'cu' + country = 'Cuba' + if 'addresses' in org and len(org['addresses']) > 0: + country_code = org['addresses'][0]['country_code'] + country = org['addresses'][0]['country'] + person['country'] = {'code': country_code, 'name': country} + + if 'institutional_email' in person and len(person['institutional_email']) > 0: + person.add_email_address(person['institutional_email']) + if 'emails' in person: + for ma in person['emails']: + person.add_email_address(person['institutional_email']) + if 'lastName' in person: + person['last_name'] = person['lastName'] + + person.pop('lastName') + person.pop('institutional_email') + person.pop('emails') + + new_identifiers = [] + for identifier in person[pids.IDENTIFIERS_FIELD]: + if identifier['idtype'] == 'noCi': + new_identifiers.append({ + 'idtype': 'dni', + 'value': 'dni:' + country_code + '.' + identifier['idvalue'], + }) + elif identifier['idtype'] == 'idExpediente': + new_identifiers.append({ + 'idtype': 'hrid', + 'value': 'hrid:' + str(org.id) + '.' + identifier['idvalue'], + }) + else: + new_identifiers.append(identifier) + person[pids.IDENTIFIERS_FIELD] = new_identifiers + return person diff --git a/iroko/patents/cli.py b/iroko/patents/cli.py new file mode 100644 index 00000000..a2c0e21f --- /dev/null +++ b/iroko/patents/cli.py @@ -0,0 +1,27 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +import os + +import click +from flask import current_app +from flask.cli import with_appcontext + +from iroko.patents.api import PatentRecord + + +@click.group() +def patents(): + """Command related to patents iroko data.""" + + +@patents.command() +@click.argument('perid') +@with_appcontext +def import_from_file(perid): + """Load from specific file en data/patents/patents.json""" + + datadir = current_app.config['IROKO_DATA_DIRECTORY'] + file_path = os.path.join(datadir, 'patents', 'patents.json') + PatentRecord.load_from_json_file(file_path, perid) diff --git a/iroko/patents/ext.py b/iroko/patents/ext.py new file mode 100644 index 00000000..5a63e142 --- /dev/null +++ b/iroko/patents/ext.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + + +"""Flask extension for Iroko Patents.""" + +from __future__ import absolute_import, print_function + +from iroko.patents.cli import patents + + +class IrokoPatents(object): + """Iroko extension.""" + + def __init__(self, app=None): + """Extension initialization.""" + if app: + self.init_app(app) + + def init_app(self, app): + """Flask application initialization.""" + app.cli.add_command(patents) + # self.init_config(app) + app.extensions['iroko-patents'] = self diff --git a/iroko/patents/fixtures.py b/iroko/patents/fixtures.py new file mode 100644 index 00000000..04ffd912 --- /dev/null +++ b/iroko/patents/fixtures.py @@ -0,0 +1,103 @@ +import datetime +import os +from typing import List +from unicodedata import normalize + +from pandas import DataFrame, read_csv + +from iroko.records.api import IrokoRecord +from iroko.records.search import IrokoRecordSearch + + +def _is_cuban_affiliation(affiliation: str): + fix_words = ['cuba', 'pinar del rio', 'artemisa' + , 'mayabeque', 'matanzas', 'habana' + , 'cienfuegos', 'villa clara', 'santa clara' + , 'santi spiritus', 'ciego de avila' + , 'camaguey', 'las tunas', 'bayamo', 'holguin' + , 'santiago de cuba', 'guantanamo'] + af = normalize('NFC', affiliation.lower()) + for word in fix_words: + if word in af: + return True + return False + +def _is_university_affiliation(affiliation: str): + fix_words = ['universidad', 'university'] + af = normalize('NFC', affiliation.lower()) + for word in fix_words: + if word in af: + return True + return False + + +def _creator_is_cuban(creator): + if 'affiliations' in creator: + for aff in creator['affiliations']: + if _is_cuban_affiliation(aff): + return True + return False + + +def _creator_is_author(creator): + if 'roles' in creator: + for role in creator['roles']: + if role == 'Author': + return True + return False + + +def get_cuban_authors_from_record(rec: IrokoRecord): + authors: List[dict] = [] + if 'creators' in rec: + for creator in rec['creators']: + if _creator_is_author(creator) and _creator_is_cuban(creator): + authors.append(creator) + return authors + + +def get_all_cubans_authors_from_records(): + search = IrokoRecordSearch() + cubans = dict() + universities = dict() + for hit in search.scan(): + record = IrokoRecord.get_record_by_pid_value(hit.id) + authors = get_cuban_authors_from_record(record) + for aut in authors: + if 'name' in aut and aut['name'] not in cubans: + cubans[aut['name']] = aut + for aff in aut['affiliations']: + if _is_university_affiliation(aff): + universities[aut['name']] = aut + return cubans, universities + +def _tmp_func(): + search = IrokoRecordSearch() + last:str = '2022-12-31' + universities = dict() + for hit in search.scan(): + record = IrokoRecord.get_record_by_pid_value(hit.id) + cur = record['publication_date'] + if last > cur: + last = cur + print('---------------------') + print('---------------------') + print(last) + print(record) + print('---------------------') + print('---------------------') +#Helpers for file uploads +def allowed_file(filename): + ALLOWED_EXTENSIONS = {'csv', 'json'} + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +def get_ext(filename): + return filename.rsplit('.', 1)[1].lower() + +def csv_to_json(file): + filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + file.save(os.path.join('./data',filename+'.'+get_ext(file.filename))) + df= read_csv(os.path.join('./data')+'/'+filename+'.'+get_ext(file.filename)) + DataFrame.to_json(df,path_or_buf=os.path.join('./data',filename+'.json'),orient='records') + return os.path.join('./data',filename+'.json') diff --git a/iroko/patents/jsonschemas/__init__.py b/iroko/patents/jsonschemas/__init__.py new file mode 100644 index 00000000..a1a2aa68 --- /dev/null +++ b/iroko/patents/jsonschemas/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + + +"""JSON schemas. + +JSON schemas are used to define the structure of the record data of your +instance. + +The documentation of +`Invenio-JSONSchemas `_ +describes their usage and configuration options. +""" diff --git a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json new file mode 100644 index 00000000..c2fc2bfd --- /dev/null +++ b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json @@ -0,0 +1,226 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "http://localhost/schemas/patents/patent-v1.0.0.json", + "title": "Patent Schema", + "type": "object", + "additionalProperties": true, + "required": [ + "id", + "title" + ], + "properties": { + "id": { + "type": "string", + "description": "Iroko UUID" + }, + "identifiers": { + "type": "array", + "description": "Patent Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + }, + "title": { + "type": "string", + "description": "The title of the patent." + }, + "authors": { + "type": "array", + "description": "A list with the inventors of the patent", + "minItems": 0, + "items":{ + "type": "object", + "properties": { + "name": { + "description": "The name of the author", + "type": "string" + }, + "identifiers": { + "type": "array", + "description": "Person Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + } + } + } + }, + "affiliations": { + "type": "array", + "description": "A list with the affiliations of the patent", + "minItems": 0, + "items":{ + "type": "object", + "properties": { + "name": { + "description": "The name of the affiliation", + "type": "string" + }, + "identifiers": { + "type": "array", + "description": "Person Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + } + } + } + }, + "co_author": { + "type": "array", + "description": "A list of names of the co authors of the patent", + "minItems": 0, + "items": { + "type": "object", + "properties": { + "name":{ + "type":"string", + "description": "The name of the coauthor" + }, + "identifiers": { + "type": "array", + "description": "Person Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + } + } + } + }, + "summary": { + "type": "string", + "description": "A summary of the patent" + }, + "classification": { + "type": "string", + "description": "The classification of the patent" + }, + "claims": { + "type": "string", + "description": "An url to the file with the claims of the patent which define in technical terms why protection for the invention is requested" + }, + "prior_art": { + "type": "string", + "description": "An url to the file with the background and description of the problem" + }, + "drawing": { + "type": "string", + "description": "An url with a drawing with the design of what you want to patent " + }, + "countries": { + "type": "array", + "description": "A list of the countries where the inventor wants to patent the product", + "items": { + "type": "string" + } + }, + "country": { + "type": "object", + "description": "The country where originally the patent was presented", + "properties": { + "code": { + "type": "string", + "description": "The ISO 3166-1 alpha-2 code of the country" + }, + "name": { + "type": "string", + "description": "The name of the country" + } + } + }, + "international":{ + "type": "boolean", + "description": "If the patent was presented in all countries that belong to the PCT" + }, + "expedient_number": { + "type": "string", + "description": "The number of the expedient of the patent" + }, + "key_words": { + "type": "array", + "description": "A list of key words of the patent", + "items": { + "type": "string" + } + }, + "presentation_date": { + "type": "string", + "description": "The date of the presentation of the patent", + "format": "date-time" + }, + "register_number": { + "type": "string", + "description": "The number of the register of the patent" + }, + "register_date": { + "type": "string", + "description": "The date the patent was registered", + "format": "date-time" + }, + "publication_date": { + "type": "string", + "description": "The date the patent was published", + "format": "date-time" + }, + "ipc_clases": { + "type": "string", + "description": " " + }, + "subtype": { + "type": "string", + "description": "The patent application subtype" + }, + "status": { + "type": "string", + "description": "The status of the open patent. It could be to review or aproved", + "enum": [ + "APROVED", + "TO_REVIEW" + ] + }, + "legal_status": { + "type": "string", + "description": "The legal status of the patent" + } + } + } diff --git a/iroko/patents/loaders/__init__.py b/iroko/patents/loaders/__init__.py new file mode 100644 index 00000000..4611d81e --- /dev/null +++ b/iroko/patents/loaders/__init__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Loaders. + +This file contains sample loaders that can be used to deserialize input data in +an application level data structure. The marshmallow_loader() method can be +parameterized with different schemas for the record metadata. In the provided +json_v1 instance, it uses the MetadataSchemaV1, defining the +PersistentIdentifier field. +""" + +from __future__ import absolute_import, print_function + +from invenio_records_rest.loaders.marshmallow import marshmallow_loader + +from iroko.patents.marshmallow import PatentRecordSchemaV1 + +#: JSON loader using Marshmallow for data validation. +json_v1 = marshmallow_loader(PatentRecordSchemaV1) + +__all__ = ( + 'json_v1', +) diff --git a/iroko/patents/mappings/__init__.py b/iroko/patents/mappings/__init__.py new file mode 100644 index 00000000..3e569253 --- /dev/null +++ b/iroko/patents/mappings/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Mappings. + +Mappings define how organizations and their fields will be indexed in Elasticsearch. +The provided record-v1.0.0.json file is an example of how to index organizations +in Elasticsearch. You need to provide one mapping per major version of +Elasticsearch you want to support. +""" + +from __future__ import absolute_import, print_function diff --git a/iroko/patents/mappings/v6/__init__.py b/iroko/patents/mappings/v6/__init__.py new file mode 100644 index 00000000..c4d43bd7 --- /dev/null +++ b/iroko/patents/mappings/v6/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Mappings for Elasticsearch 6.x.""" + +from __future__ import absolute_import, print_function diff --git a/iroko/patents/mappings/v6/persons/patent-v1.0.0.json b/iroko/patents/mappings/v6/persons/patent-v1.0.0.json new file mode 100644 index 00000000..a8e600b1 --- /dev/null +++ b/iroko/patents/mappings/v6/persons/patent-v1.0.0.json @@ -0,0 +1,147 @@ +{ + "mappings":{ + "patent-v1.0.0": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "id": { + "type": "keyword" + }, + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "title": { + "type": "keyword" + }, + "authors": { + "type": "object", + "properties":{ + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "name": "keyword" + } + }, + "affiliations": { + "type": "object", + "properties":{ + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "name": "keyword" + } + }, + "co_author": { + "type": "object", + "properties": { + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "name":"keyword" + } + }, + "summary": { + "type": "keyword" + }, + "classification": { + "type": "keyword" + }, + "claims": { + "type": "keyword" + }, + "prior_art": { + "type": "keyword" + }, + "drawing": { + "type": "keyword" + }, + "countries": { + "type": "keyword" + }, + "country": { + "type": "object", + "properties": { + "code": { + "type": "keyword" + }, + "name": { + "type": "keyword" + } + } + }, + "international": { + "type": "keyword" + }, + "expedient_number": { + "type": "keyword" + }, + "key_words": { + "type": "keyword" + }, + "presentation_date": { + "type": "date", + "format": "date" + }, + "register_number": { + "type": "keyword" + }, + "register_date": { + "type": "date", + "format": "date" + }, + "publication_date": { + "type": "date", + "format": "date" + }, + "ipc_clases": { + "type": "keyword" + }, + "subtype": { + "type": "keyword" + }, + "status": { + "type": "keyword" + }, + "legal_status": { + "type": "keyword" + } + } + } + } +} diff --git a/iroko/patents/marshmallow/__init__.py b/iroko/patents/marshmallow/__init__.py new file mode 100644 index 00000000..72fa19d5 --- /dev/null +++ b/iroko/patents/marshmallow/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Schemas for marshmallow.""" + +from __future__ import absolute_import, print_function + +from .json import PatentRecordSchemaV1 + +__all__ = ( +'PatentRecordSchemaV1') diff --git a/iroko/patents/marshmallow/json.py b/iroko/patents/marshmallow/json.py new file mode 100644 index 00000000..fd0e8aae --- /dev/null +++ b/iroko/patents/marshmallow/json.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""JSON Schemas.""" + +from __future__ import absolute_import, print_function + +from invenio_jsonschemas import current_jsonschemas +from invenio_records_rest.schemas import Nested, StrictKeysMixin +from invenio_records_rest.schemas.fields import ( + DateString, GenFunction, + PersistentIdentifier, SanitizedUnicode, + ) +from marshmallow import INCLUDE, fields, missing, validate + +allow_empty = validate.Length(min=0) + + +def bucket_from_context(_, context): + """Get the record's bucket from context.""" + record = (context or {}).get('record', {}) + return record.get('_bucket', missing) + + +def files_from_context(_, context): + """Get the record's files from context.""" + record = (context or {}).get('record', {}) + return record.get('_files', missing) + + +def schema_from_context(_, context): + """Get the record's schema from context.""" + record = (context or {}).get('record', {}) + return record.get( + "_schema", + current_jsonschemas.path_to_url(PatentRecord._schema) + ) + + +class IdentifierSchemaV1(StrictKeysMixin): + """Ids schema.""" + + idtype = SanitizedUnicode() + value = SanitizedUnicode() + + +class CountrySchemaV1(StrictKeysMixin): + name = SanitizedUnicode() + code = SanitizedUnicode() + + +class AffiliationsSchemaV1(StrictKeysMixin): + id = SanitizedUnicode() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + name = SanitizedUnicode() + +class PersonSchemaV1(StrictKeysMixin): + id = SanitizedUnicode() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + name = SanitizedUnicode() + + +class PatentMetadataSchemaV1(StrictKeysMixin): + """Schema for the record metadata.""" + + id = PersistentIdentifier() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + title = SanitizedUnicode(required=True, validate=validate.Length(min=3)) + authors = Nested(PersonSchemaV1, many=True) + affiliations = Nested(AffiliationsSchemaV1, many=True) + co_author = Nested(PersonSchemaV1, many=True) + summary = SanitizedUnicode() + classification = SanitizedUnicode() + claims = SanitizedUnicode() + prior_art = SanitizedUnicode() + drawing = SanitizedUnicode() + countries = fields.List(SanitizedUnicode(), many=True) + country = Nested(CountrySchemaV1, many=False) + international = fields.Bool() + expedient_number = SanitizedUnicode() + key_words = fields.List(SanitizedUnicode(), many=True) + presentation_date = DateString() + register_number = SanitizedUnicode() + register_date = DateString() + publication_date = DateString() + ipc_clases = SanitizedUnicode() + subtype = SanitizedUnicode() + status = SanitizedUnicode() + legal_status = SanitizedUnicode() + _schema = GenFunction( + attribute="$schema", + data_key="$schema", + deserialize=schema_from_context, # to be added only when loading + ) + + +class PatentRecordSchemaV1(StrictKeysMixin): + """Record schema.""" + + metadata = fields.Nested(PatentMetadataSchemaV1) + created = fields.Str(dump_only=True) + revision = fields.Integer(dump_only=True) + updated = fields.Str(dump_only=True) + links = fields.Dict(dump_only=True) + id = PersistentIdentifier() + files = GenFunction( + serialize=files_from_context, deserialize=files_from_context) + +patentMetadataSchema = PatentMetadataSchemaV1(many=False, unknown=INCLUDE) diff --git a/iroko/patents/permissions.py b/iroko/patents/permissions.py new file mode 100644 index 00000000..072331c1 --- /dev/null +++ b/iroko/patents/permissions.py @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Permissions for Iroko.""" +from flask_login import current_user +from flask_principal import RoleNeed +from invenio_access import Permission + +curator_permission = Permission(RoleNeed('curator')) + +def can_edit_patent_factory(record, *args, **kwargs): + """Checks if logged user can update or delete person items. + """ + def can(self): + if current_user.is_authenticated and curator_permission.can(): + return True + return False + return type('Check', (), {'can': can})() diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py new file mode 100644 index 00000000..a4592b3c --- /dev/null +++ b/iroko/patents/rest.py @@ -0,0 +1,85 @@ + + + +from __future__ import absolute_import, print_function + +import datetime +import os + +from flask import Blueprint, flash, jsonify, make_response, request + +from iroko.patents.api import PatentRecord +from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext +from iroko.patents.serializers import json_v1_response +from iroko.pidstore import pids + +api_blueprint = Blueprint( + 'iroko_api_patents', + __name__, + url_prefix='/patents' + ) + + +@api_blueprint.route('/pid', methods=['GET']) +def get_patent_by_pid_canonical(): + """ + Get a source by any PID received as an argument, including UUID + this method gives the directed organization with that pid, even if is obsolete or redirected status + """ + try: + _id = request.args.get('value') + print("**********************", _id) + pid, patent = PatentRecord.get_record_by_pid(pids.PATENT_PID_TYPE, _id) + if not pid or not patent: + raise Exception('') + + return json_v1_response(pid, patent) + + except Exception as e: + return jsonify({ + 'ERROR': 'no pid found'.format(_id) + }) + + + + + + +@api_blueprint.route('/import/', methods=['POST']) +# @require_api_auth() +def upload_file(per_uuid): + # /tmp/iroko/person/.[csv|json] + # try: + if request.method == 'POST': + print(request.__dict__) + print('--------------------------------') + print(request.files) + print('--------------------------------') + if 'file' not in request.files: + flash('No file part') + raise Exception("No file part") + file = request.files['file'] + # If the user does not select a file, the browser submits an + # empty file without a filename. + if file.filename == '': + flash('No selected file') + raise Exception("Not file in request") + if file and allowed_file(file.filename): + if 'csv'==get_ext(file.filename): + json_path=csv_to_json(file) + PatentRecord.load_from_json_file(json_path, per_uuid) + response = make_response(jsonify({'msg': 'success'})) + return response, 201 + else: + filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")+'.'+'json' + + file.save(os.path.join('./data', filename )) + PatentRecord.load_from_json_file(os.path.join('./data',filename),per_uuid ) + response = make_response(jsonify({'msg': 'success'})) + return response ,201 + else: + raise Exception("no valid file extension") + + # except Exception as e: + # print(e) + # return iroko_json_response(IrokoResponseStatus.ERROR, str(e), None, None) diff --git a/iroko/patents/search.py b/iroko/patents/search.py new file mode 100644 index 00000000..298096e4 --- /dev/null +++ b/iroko/patents/search.py @@ -0,0 +1,19 @@ + +"""Source search APIs.""" + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +from invenio_search import RecordsSearch + + +class PatentsSearch(RecordsSearch): + """RecordsSearch for sources.""" + + class Meta: + """Search only on organizations index.""" + + index = "patents" + doc_types = None diff --git a/iroko/patents/serializers/__init__.py b/iroko/patents/serializers/__init__.py new file mode 100644 index 00000000..a664f724 --- /dev/null +++ b/iroko/patents/serializers/__init__.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Record serializers.""" + +from __future__ import absolute_import, print_function + +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import ( + record_responsify, + search_responsify, + ) + +# Serializers +# =========== +#: JSON serializer definition. +from iroko.patents.marshmallow.json import PatentRecordSchemaV1 + +json_v1 = JSONSerializer(PatentRecordSchemaV1, replace_refs=True) + +# Records-REST serializers +# ======================== +#: JSON record serializer for individual organizations. +json_v1_response = record_responsify(json_v1, 'application/json') +#: JSON record serializer for search results. +json_v1_search = search_responsify(json_v1, 'application/json') + +__all__ = ( + 'json_v1', + 'json_v1_response', + 'json_v1_search', +) diff --git a/iroko/patents/utils.py b/iroko/patents/utils.py new file mode 100644 index 00000000..61efa26e --- /dev/null +++ b/iroko/patents/utils.py @@ -0,0 +1,65 @@ + + +from lxml import etree + +from iroko.records import ContributorRole + + +def get_people_from_nlm(metadata: etree._Element): + """get a PersonRecord from {http://dtd.nlm.nih.gov/publishing/2.3}contrib + etree._Element + return creators, contribs dics, """ + + xmlns = '{http://dtd.nlm.nih.gov/publishing/2.3}' + contribs_xml = metadata.findall('.//' + xmlns + 'contrib') + + contributors = {} + + for contrib in contribs_xml: + person = dict() + + surname = contrib.find(xmlns + 'name/' + xmlns + 'surname') + given_names = contrib.find(xmlns + 'name/' + xmlns + 'given-names') + aff = contrib.find(xmlns + 'aff') + email = contrib.find(xmlns + 'email') + if given_names is None and surname is None: + # FIXME if a person dont have surname or given name, then is not a person.... + # even if there is an email? + continue + else: + name = "" + if given_names is not None and given_names.text is not None: + name += given_names.text + if surname is not None and surname.text is not None: + name += ' ' + surname.text + person['name'] = name + if aff is not None: + person['affiliations'] = [] + person['affiliations'].append(aff.text) + if email is not None: + person['email'] = email.text + person['roles'] = [] + if 'corresp' in contrib.attrib: + if contrib.attrib['corresp'] == "yes": + person['roles'].append(ContributorRole.ContactPerson.value) + if 'contrib-type' in contrib.attrib: + ctype = contrib.attrib['contrib-type'] + if ctype == "author": + person['roles'].append(ContributorRole.Author.value) + if ctype == "editor": + person['roles'].append(ContributorRole.Editor.value) + if ctype == "jmanager": + person['roles'].append(ContributorRole.JournalManager.value) + if person['name'] in contributors.keys(): + contributors[person['name']]['roles'].extend(person['roles']) + else: + contributors[person['name']] = person + creators = [] + contribs = [] + for name in contributors: + person = contributors[name] + if ContributorRole.Author.value in person['roles']: + creators.append(person) + else: + contribs.append(person) + return creators, contribs diff --git a/iroko/pidstore/pids.py b/iroko/pidstore/pids.py index 6652991a..fde5eee1 100644 --- a/iroko/pidstore/pids.py +++ b/iroko/pidstore/pids.py @@ -35,6 +35,10 @@ PERSON_PID_MINTER = "perid" PERSON_PID_FETCHER = "perid" +PATENT_PID_TYPE = "patid" +PATENT_PID_MINTER = "patid" +PATENT_PID_FETCHER = "patid" + IROKO_OBJECT_TYPE = "rec" IROKO_UUID_FIELD = "id" @@ -42,7 +46,8 @@ 'irouid', 'srcid', 'orgid', - 'perid' + 'perid', + 'patid' ] def get_pid_by_data(data): From 72d904ac781de6c960a8de8ccabeb5d01e56e873 Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Tue, 3 Oct 2023 19:27:05 +0200 Subject: [PATCH 2/7] Web scraping to OCPI --- iroko/config.py | 45 +++++++++++++++++- iroko/patents/api.py | 27 ++++++++--- iroko/patents/cli.py | 6 +-- iroko/patents/importaciones/gp.py | 11 +++++ iroko/patents/importaciones/ocpi.py | 47 +++++++++++++++++++ .../jsonschemas/patents/patent-v1.0.0.json | 10 +--- .../{persons => patents}/patent-v1.0.0.json | 3 -- iroko/patents/marshmallow/json.py | 1 - iroko/patents/rest.py | 2 +- 9 files changed, 128 insertions(+), 24 deletions(-) create mode 100644 iroko/patents/importaciones/gp.py create mode 100644 iroko/patents/importaciones/ocpi.py rename iroko/patents/mappings/v6/{persons => patents}/patent-v1.0.0.json (98%) diff --git a/iroko/config.py b/iroko/config.py index a7373192..b9c411cc 100755 --- a/iroko/config.py +++ b/iroko/config.py @@ -28,13 +28,17 @@ from iroko.organizations.api import OrganizationRecord from iroko.organizations.permissions import can_edit_organization_factory from iroko.organizations.search import OrganizationSearch +from iroko.patents.api import PatentRecord +from iroko.patents.search import PatentsSearch +from iroko.patents.permissions import can_edit_patent_factory from iroko.persons.api import PersonRecord from iroko.persons.permissions import can_edit_person_factory from iroko.persons.search import PersonsSearch from iroko.pidstore import pids as pids from iroko.pidstore.pids import ( ORGANIZATION_PID_FETCHER, ORGANIZATION_PID_MINTER, - ORGANIZATION_PID_TYPE, PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE, + ORGANIZATION_PID_TYPE, PATENT_PID_FETCHER, PATENT_PID_MINTER, PATENT_PID_TYPE, + PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE, ) from iroko.records.api import IrokoRecord from iroko.records.search import IrokoRecordSearch @@ -122,6 +126,10 @@ def _(x): _ORG_CONVERTER = ( 'pid(orgid, record_class="iroko.organizations.api.OrganizationRecord")' ) +_PATENT_CONVERTER = ( + 'pid(patid, record_class="iroko.patents.api.PatentRecord")' +) + _PERSON_CONVERTER = ( 'pid(perid, record_class="iroko.persons.api.PersonRecord")' ) @@ -224,6 +232,37 @@ def _(x): 'delete_permission_factory_imp': can_edit_organization_factory, 'list_permission_factory_imp': allow_all }, + 'patid': { + 'pid_type': PATENT_PID_TYPE, + 'pid_minter': PATENT_PID_MINTER, + 'pid_fetcher': PATENT_PID_FETCHER, + 'default_endpoint_prefix': True, + 'record_class': PatentRecord, + 'search_class': PatentsSearch, + 'indexer_class': RecordIndexer, + 'record_serializers': { + 'application/json': ('iroko.patents.serializers' + ':json_v1_response'), + }, + 'search_serializers': { + 'application/json': ('iroko.patents.serializers' + ':json_v1_search'), + }, + 'record_loaders': { + 'application/json': ('iroko.patents.loaders' + ':json_v1'), + }, + 'list_route': '/search/patents/', + 'item_route': '/pid/patent/<{0}:pid_value>'.format(_PATENT_CONVERTER), + 'default_media_type': 'application/json', + 'max_result_window': 10000, + 'error_handlers': {}, + 'create_permission_factory_imp': can_edit_patent_factory, + 'read_permission_factory_imp': check_elasticsearch, + 'update_permission_factory_imp': can_edit_patent_factory, + 'delete_permission_factory_imp': can_edit_patent_factory, + 'list_permission_factory_imp': allow_all + }, 'perid': { 'pid_type': PERSON_PID_TYPE, 'pid_minter': PERSON_PID_MINTER, @@ -462,6 +501,10 @@ def _(x): 'query': 'bestmatch', 'noquery': 'bestmatch', }, + 'patents': { + 'query': 'bestmatch', + 'noquery': 'bestmatch', + }, 'persons': { 'query': 'bestmatch', 'noquery': 'bestmatch', diff --git a/iroko/patents/api.py b/iroko/patents/api.py index c7619939..bd2c6fec 100755 --- a/iroko/patents/api.py +++ b/iroko/patents/api.py @@ -54,11 +54,29 @@ def load_from_json_file(cls, file_path): @classmethod def get_pat_by_pid(cls, pid_value, with_deleted=False): - return cls.get_record_by_pid_value(pid_value) + resolver = Resolver( + pid_type=PATENT_PID_TYPE, + object_type=IROKO_OBJECT_TYPE, + getter=cls.get_record, + ) + try: + return resolver.resolve(str(pid_value)) + except Exception: + pass + + for pid_type in identifiers_schemas: + try: + resolver.pid_type = pid_type + schemapid, pat = resolver.resolve(pid_value) + pid = PersistentIdentifier.get(PATENT_PID_TYPE, pat['id']) + return pid, pat + except Exception as e: + pass + return None, None @classmethod def create_or_update(cls, pat_uuid, data, **kwargs): - """Create or update OrganizationRecord.""" + """Create or update PatentRecord.""" # assert pat_uuid pat, msg = cls.resolve_and_update(pat_uuid, data) @@ -74,7 +92,7 @@ def create_or_update(cls, pat_uuid, data, **kwargs): @classmethod def delete(cls, data, vendor=None, delindex=True, force=False): - """Delete a IrokoRecord record.""" + """Delete an IrokoRecord record.""" assert data.get(cls.pid_uuid_field) pid = data.get(cls.pid_uuid_field) record = cls.get_record_by_pid_value(pid) @@ -87,9 +105,6 @@ def delete(cls, data, vendor=None, delindex=True, force=False): pass return result - - - def fixture_spi_fields(person: PersonRecord, org: OrganizationRecord): """hard code fixtures of spi data, coming from human resources of cuban institutions """ country_code = 'cu' diff --git a/iroko/patents/cli.py b/iroko/patents/cli.py index a2c0e21f..0d8862ce 100644 --- a/iroko/patents/cli.py +++ b/iroko/patents/cli.py @@ -17,11 +17,11 @@ def patents(): @patents.command() -@click.argument('perid') +@click.argument('patid') @with_appcontext -def import_from_file(perid): +def import_from_file(patid): """Load from specific file en data/patents/patents.json""" datadir = current_app.config['IROKO_DATA_DIRECTORY'] file_path = os.path.join(datadir, 'patents', 'patents.json') - PatentRecord.load_from_json_file(file_path, perid) + PatentRecord.load_from_json_file(file_path, patid) diff --git a/iroko/patents/importaciones/gp.py b/iroko/patents/importaciones/gp.py new file mode 100644 index 00000000..f6ea44f7 --- /dev/null +++ b/iroko/patents/importaciones/gp.py @@ -0,0 +1,11 @@ +import requests +import wget +from time import sleep +import bs4 as bs + +url = 'https://patents.google.com/?type=PATENT&oq=type:PATENT' +result = requests.get(url) +content = result.text + +soup = bs.BeautifulSoup(content, 'html.parser') +print(soup) diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py new file mode 100644 index 00000000..6d609df0 --- /dev/null +++ b/iroko/patents/importaciones/ocpi.py @@ -0,0 +1,47 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +import json +import requests +import bs4 as bs +from lxml import html + +website = 'https://wiposearch.ocpi.cu/wopublish-search/public/patents' +resultado = requests.get(website) +content = resultado.text + +soup = bs.BeautifulSoup(content, 'html.parser') +rows = soup.find('table', {'class': 'table table-view COLUMN'}).find('tbody').find_all('tr') +a = soup.find('table', {'title' : 'Go to next page'}) +print(a) +patent = { + "identifiers": "", + "title": "", + "publication_date": "", + "affiliations": [], + "authors": [], + "subtype": "", + "legal_status": "" +} + +for row in rows: + patent["identifiers"] = str.rstrip(row.find_all('td')[2].get_text()) + patent["title"] = row.find_all('td')[1].get_text().rstrip() + patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() + patent["affiliations"] = row.find_all('td')[8].get_text().rstrip() + patent["authors"] = row.find_all('td')[9].get_text().rstrip() + patent["subtype"] = row.find_all('td')[10].get_text().rstrip() + patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() + json_patents = json.dumps(patent) + break + + + + + + + + diff --git a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json index c2fc2bfd..c6905a0c 100644 --- a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json +++ b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json @@ -6,7 +6,7 @@ "additionalProperties": true, "required": [ "id", - "title" + "identifiers" ], "properties": { "id": { @@ -210,14 +210,6 @@ "type": "string", "description": "The patent application subtype" }, - "status": { - "type": "string", - "description": "The status of the open patent. It could be to review or aproved", - "enum": [ - "APROVED", - "TO_REVIEW" - ] - }, "legal_status": { "type": "string", "description": "The legal status of the patent" diff --git a/iroko/patents/mappings/v6/persons/patent-v1.0.0.json b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json similarity index 98% rename from iroko/patents/mappings/v6/persons/patent-v1.0.0.json rename to iroko/patents/mappings/v6/patents/patent-v1.0.0.json index a8e600b1..1c722d69 100644 --- a/iroko/patents/mappings/v6/persons/patent-v1.0.0.json +++ b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json @@ -135,9 +135,6 @@ "subtype": { "type": "keyword" }, - "status": { - "type": "keyword" - }, "legal_status": { "type": "keyword" } diff --git a/iroko/patents/marshmallow/json.py b/iroko/patents/marshmallow/json.py index fd0e8aae..65ec5c2d 100644 --- a/iroko/patents/marshmallow/json.py +++ b/iroko/patents/marshmallow/json.py @@ -94,7 +94,6 @@ class PatentMetadataSchemaV1(StrictKeysMixin): publication_date = DateString() ipc_clases = SanitizedUnicode() subtype = SanitizedUnicode() - status = SanitizedUnicode() legal_status = SanitizedUnicode() _schema = GenFunction( attribute="$schema", diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index a4592b3c..4fa549f2 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -29,7 +29,7 @@ def get_patent_by_pid_canonical(): try: _id = request.args.get('value') print("**********************", _id) - pid, patent = PatentRecord.get_record_by_pid(pids.PATENT_PID_TYPE, _id) + pid, patent = PatentRecord.get_pat_by_pid(pids.PATENT_PID_TYPE, _id) if not pid or not patent: raise Exception('') From b1e7ba434ee6a822b91e70b9ff1a2ce1c66b8622 Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Wed, 25 Oct 2023 21:29:49 +0200 Subject: [PATCH 3/7] Fixing some errors --- .../importaciones/google_patents/gp.py | 7 ++ iroko/patents/importaciones/gp.py | 11 --- iroko/patents/importaciones/ocpi.py | 69 +++++++++++++------ .../jsonschemas/patents/patent-v1.0.0.json | 62 +++-------------- .../mappings/v6/patents/patent-v1.0.0.json | 42 +++-------- iroko/patents/marshmallow/json.py | 12 ++-- iroko/patents/rest.py | 5 -- iroko/pidstore/fetchers.py | 7 ++ iroko/pidstore/minters.py | 11 ++- iroko/pidstore/providers.py | 37 ++++++++++ pyproject.toml | 10 +++ run | 2 +- setup.py | 8 +++ 13 files changed, 152 insertions(+), 131 deletions(-) create mode 100644 iroko/patents/importaciones/google_patents/gp.py delete mode 100644 iroko/patents/importaciones/gp.py diff --git a/iroko/patents/importaciones/google_patents/gp.py b/iroko/patents/importaciones/google_patents/gp.py new file mode 100644 index 00000000..51631ee1 --- /dev/null +++ b/iroko/patents/importaciones/google_patents/gp.py @@ -0,0 +1,7 @@ +import requests +import wget +from time import sleep +from requests_html import HTMLSession +import bs4 as bs + + diff --git a/iroko/patents/importaciones/gp.py b/iroko/patents/importaciones/gp.py deleted file mode 100644 index f6ea44f7..00000000 --- a/iroko/patents/importaciones/gp.py +++ /dev/null @@ -1,11 +0,0 @@ -import requests -import wget -from time import sleep -import bs4 as bs - -url = 'https://patents.google.com/?type=PATENT&oq=type:PATENT' -result = requests.get(url) -content = result.text - -soup = bs.BeautifulSoup(content, 'html.parser') -print(soup) diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py index 6d609df0..8ce936d0 100644 --- a/iroko/patents/importaciones/ocpi.py +++ b/iroko/patents/importaciones/ocpi.py @@ -10,33 +10,62 @@ from lxml import html website = 'https://wiposearch.ocpi.cu/wopublish-search/public/patents' -resultado = requests.get(website) -content = resultado.text -soup = bs.BeautifulSoup(content, 'html.parser') -rows = soup.find('table', {'class': 'table table-view COLUMN'}).find('tbody').find_all('tr') -a = soup.find('table', {'title' : 'Go to next page'}) -print(a) patent = { "identifiers": "", "title": "", - "publication_date": "", - "affiliations": [], "authors": [], - "subtype": "", - "legal_status": "" + "affiliations": [], + "country": "", + "language": "", + "creation_date": "", + "grant_date": "", + "publication_date": "", + "legal_status": "", } -for row in rows: - patent["identifiers"] = str.rstrip(row.find_all('td')[2].get_text()) - patent["title"] = row.find_all('td')[1].get_text().rstrip() - patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() - patent["affiliations"] = row.find_all('td')[8].get_text().rstrip() - patent["authors"] = row.find_all('td')[9].get_text().rstrip() - patent["subtype"] = row.find_all('td')[10].get_text().rstrip() - patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() - json_patents = json.dumps(patent) - break +def getData(url): + resultado = requests.get(url) + content = resultado.text + soup = bs.BeautifulSoup(content, 'html.parser') + rows = soup.find('table', {'class': 'table table-view COLUMN'}).find('tbody').find_all('tr') + for row in rows: + patent["identifiers"] = row.find_all('td')[2].get_text().rstrip() + patent["title"] = row.find_all('td')[1].get_text().rstrip() + patent["authors"] = row.find_all('td')[9].get_text().rstrip() + patent["affiliations"] = row.find_all('td')[8].get_text().rstrip() + patent["country"] = "Cuba" + patent["language"] = "spanish" + patent["creation_date"] = row.find_all('td')[3].get_text().rstrip() + patent["grant_date"] = row.find_all('td')[5].get_text().rstrip() + patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() + patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() + json_patent = json.dumps(patent) + print(json_patent) + return soup + +def nextPage(soup): + a = soup.find(attrs= {'id': 'id14'}) + url = a['href'] + return url + +def pagination(url): + haySiguiente = True + while(haySiguiente): + try: + soup = getData(url) + url_siguiente = nextPage(soup) + url = url_siguiente + except: + haySiguiente = False + + return 'Ok' + +print(pagination(website)) + + + + diff --git a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json index c6905a0c..67c7e510 100644 --- a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json +++ b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json @@ -11,7 +11,7 @@ "properties": { "id": { "type": "string", - "description": "Iroko UUID" + "description": "Iroko UUID, pid_type = patid" }, "identifiers": { "type": "array", @@ -96,37 +96,6 @@ } } }, - "co_author": { - "type": "array", - "description": "A list of names of the co authors of the patent", - "minItems": 0, - "items": { - "type": "object", - "properties": { - "name":{ - "type":"string", - "description": "The name of the coauthor" - }, - "identifiers": { - "type": "array", - "description": "Person Identifiers", - "items": { - "type": "object", - "additionalProperties": false, - "properties": { - "idtype": { - "description": "identifier type", - "type": "string" - }, - "value": { - "type": "string" - } - } - } - } - } - } - }, "summary": { "type": "string", "description": "A summary of the patent" @@ -168,13 +137,9 @@ } } }, - "international":{ - "type": "boolean", - "description": "If the patent was presented in all countries that belong to the PCT" - }, - "expedient_number": { + "language": { "type": "string", - "description": "The number of the expedient of the patent" + "description": "The language of the patent" }, "key_words": { "type": "array", @@ -183,18 +148,14 @@ "type": "string" } }, - "presentation_date": { + "creation_date": { "type": "string", - "description": "The date of the presentation of the patent", + "description": "The date the patent was created", "format": "date-time" }, - "register_number": { - "type": "string", - "description": "The number of the register of the patent" - }, - "register_date": { + "grant_date": { "type": "string", - "description": "The date the patent was registered", + "description": "The date the patent was granted", "format": "date-time" }, "publication_date": { @@ -202,13 +163,10 @@ "description": "The date the patent was published", "format": "date-time" }, - "ipc_clases": { - "type": "string", - "description": " " - }, - "subtype": { + "link": { "type": "string", - "description": "The patent application subtype" + "description": "The link of the patent in Google patents", + "format": "date-time" }, "legal_status": { "type": "string", diff --git a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json index 1c722d69..010122de 100644 --- a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json +++ b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json @@ -39,7 +39,9 @@ } } }, - "name": "keyword" + "name": { + "type": "keyword" + } } }, "affiliations": { @@ -56,24 +58,9 @@ } } }, - "name": "keyword" - } - }, - "co_author": { - "type": "object", - "properties": { - "identifiers": { - "type": "object", - "properties": { - "idtype": { - "type": "keyword" - }, - "value": { - "type": "keyword" - } - } - }, - "name":"keyword" + "name": { + "type": "keyword" + } } }, "summary": { @@ -105,23 +92,17 @@ } } }, - "international": { - "type": "keyword" - }, - "expedient_number": { + "language": { "type": "keyword" }, "key_words": { "type": "keyword" }, - "presentation_date": { + "creation_date": { "type": "date", "format": "date" }, - "register_number": { - "type": "keyword" - }, - "register_date": { + "grant_date": { "type": "date", "format": "date" }, @@ -129,10 +110,7 @@ "type": "date", "format": "date" }, - "ipc_clases": { - "type": "keyword" - }, - "subtype": { + "link": { "type": "keyword" }, "legal_status": { diff --git a/iroko/patents/marshmallow/json.py b/iroko/patents/marshmallow/json.py index 65ec5c2d..c49f0c81 100644 --- a/iroko/patents/marshmallow/json.py +++ b/iroko/patents/marshmallow/json.py @@ -77,7 +77,6 @@ class PatentMetadataSchemaV1(StrictKeysMixin): title = SanitizedUnicode(required=True, validate=validate.Length(min=3)) authors = Nested(PersonSchemaV1, many=True) affiliations = Nested(AffiliationsSchemaV1, many=True) - co_author = Nested(PersonSchemaV1, many=True) summary = SanitizedUnicode() classification = SanitizedUnicode() claims = SanitizedUnicode() @@ -85,15 +84,12 @@ class PatentMetadataSchemaV1(StrictKeysMixin): drawing = SanitizedUnicode() countries = fields.List(SanitizedUnicode(), many=True) country = Nested(CountrySchemaV1, many=False) - international = fields.Bool() - expedient_number = SanitizedUnicode() + language = SanitizedUnicode() key_words = fields.List(SanitizedUnicode(), many=True) - presentation_date = DateString() - register_number = SanitizedUnicode() - register_date = DateString() + creation_date = DateString() + grant_date = DateString() publication_date = DateString() - ipc_clases = SanitizedUnicode() - subtype = SanitizedUnicode() + link = SanitizedUnicode() legal_status = SanitizedUnicode() _schema = GenFunction( attribute="$schema", diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index 4fa549f2..3b6b4968 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -40,11 +40,6 @@ def get_patent_by_pid_canonical(): 'ERROR': 'no pid found'.format(_id) }) - - - - - @api_blueprint.route('/import/', methods=['POST']) # @require_api_auth() def upload_file(per_uuid): diff --git a/iroko/pidstore/fetchers.py b/iroko/pidstore/fetchers.py index 57dfdcc5..043b76e8 100755 --- a/iroko/pidstore/fetchers.py +++ b/iroko/pidstore/fetchers.py @@ -73,6 +73,13 @@ def person_uuid_fetcher(per_uuid, data): pid_value=str(data[pids.IROKO_UUID_FIELD]), ) +def patent_uuid_fetcher(per_uuid, data): + return FetchedPID( + provider=providers.PatentUUIDProvider, + pid_type=providers.PatentUUIDProvider.pid_type, + pid_value=str(data[pids.IROKO_UUID_FIELD]), + ) + def identifiers_fetcher(record_uuid, data, pid_type): assert data, "no data" diff --git a/iroko/pidstore/minters.py b/iroko/pidstore/minters.py index 0c3ebbd8..822acd95 100755 --- a/iroko/pidstore/minters.py +++ b/iroko/pidstore/minters.py @@ -74,14 +74,21 @@ def organization_uuid_minter(org_uuid, data): return provider.pid -def person_uuid_minter(org_uuid, data): +def person_uuid_minter(person_uuid, data): provider = providers.PersonUUIDProvider.create( object_type=pids.IROKO_OBJECT_TYPE, - object_uuid=org_uuid, + object_uuid=person_uuid, data=data ) return provider.pid +def patent_uuid_minter(patent_uuid, data): + provider = providers.PatentUUIDProvider.create( + object_type=pids.IROKO_OBJECT_TYPE, + object_uuid=patent_uuid, + data=data + ) + return provider.pid def identifiers_minter(uuid, data, object_type): prsIDs = providers.IdentifiersProvider.create_identifiers( diff --git a/iroko/pidstore/providers.py b/iroko/pidstore/providers.py index 7fd1fecf..e6ccadad 100755 --- a/iroko/pidstore/providers.py +++ b/iroko/pidstore/providers.py @@ -265,6 +265,43 @@ def create(cls, pid_type=None, pid_value=None, object_type=None, ) +class PatentUUIDProvider(BaseProvider): + """Document identifier provider.""" + + pid_type = pids.PATENT_PID_TYPE + """Type of persistent identifier.""" + + pid_provider = None + """Provider name. + The provider name is not recorded in the PID since the provider does not + provide any additional features besides creation of record ids. + """ + + default_status = PIDStatus.REGISTERED + """Record IDs are by default registered immediately. + Default: :attr:`invenio_pidstore.models.PIDStatus.REGISTERED` + """ + + object_type = pids.IROKO_OBJECT_TYPE, + + @classmethod + def create(cls, pid_type=None, pid_value=None, object_type=None, + object_uuid=None, **kwargs): + """Create a new record identifier from the depoist PID value.""" + pid_type = pid_type or cls.pid_type + pid_value = pid_value or uuid.uuid4() + object_type = object_type or cls.object_type + object_uuid = object_uuid or uuid.uuid4() + kwargs.setdefault('status', cls.default_status) + return super(PatentUUIDProvider, cls).create( + pid_type=pid_type, + pid_value=pid_value, + object_type=object_type, + object_uuid=object_uuid, + **kwargs + ) + + class IdentifiersProvider(BaseProvider): default_status = PIDStatus.REGISTERED diff --git a/pyproject.toml b/pyproject.toml index 0c35b9c4..cb7e8e3a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,7 @@ iroko_harvester = "iroko.harvester.ext:IrokoHarvester" invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles" iroko_organizations = "iroko.organizations.ext:IrokoOrganizations" iroko_persons = "iroko.persons.ext:IrokoPersons" +iroko_patents = "iroko.patents.ext:IrokoPatents" [tool.poetry.plugins."invenio_base.blueprints"] iroko = "iroko.theme.views:blueprint" @@ -130,18 +131,23 @@ iroko = "iroko.records:iroko" invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles" iroko_organizations = "iroko.organizations:IrokoOrganizations" iroko_persons = "iroko.persons:IrokoPersons" +iroko_patents = "iroko.patents:IrokoPatents" [tool.poetry.plugins."invenio_jsonschemas.schemas"] iroko = "iroko.records.jsonschemas" sources = "iroko.sources.schemas" organizations = "iroko.organizations.jsonschemas" persons = "iroko.persons.jsonschemas" +patents = "iroko.patents.jsonschemas" + [tool.poetry.plugins."invenio_search.mappings"] records = "iroko.records.mappings" sources = "iroko.sources.mappings" organizations = "iroko.organizations.mappings" persons = "iroko.persons.mappings" +patents = "iroko.patents.mappings" + [tool.poetry.plugins."invenio_admin.views"] vocabulary_admin = "iroko.vocabularies.admin:vocabularies_adminview" @@ -173,6 +179,8 @@ invenio_userprofiles = "iroko.userprofiles.rest:api_blueprint" iroko_records = "iroko.records.rest:api_blueprint" iroko_organizations = "iroko.organizations.rest:api_blueprint" iroko_persons = "iroko.persons.rest:api_blueprint" +iroko_patents = "iroko.patents.rest:api_blueprint" + iroko_evaluations = "iroko.evaluations.rest:api_blueprint" [tool.poetry.plugins."invenio_celery.tasks"] @@ -185,6 +193,7 @@ recoai = "iroko.pidstore.fetchers:iroko_source_oai_fetcher" srcid = "iroko.pidstore.fetchers:iroko_source_uuid_fetcher" orgid = "iroko.pidstore.fetchers:organization_uuid_fetcher" perid = "iroko.pidstore.fetchers:person_uuid_fetcher" +patid = "iroko.pidstore.fetchers:patent_uuid_fetcher" [tool.poetry.plugins."invenio_pidstore.minters"] irouid = "iroko.pidstore.minters:iroko_uuid_minter" @@ -193,6 +202,7 @@ recoai = "iroko.pidstore.minters:iroko_source_oai_minter" srcid = "iroko.pidstore.minters:iroko_source_uuid_minter" orgid = "iroko.pidstore.minters:organization_uuid_minter" perid = "iroko.pidstore.minters:person_uuid_minter" +patid = "iroko.pidstore.minters:patent_uuid_minter" [tool.poetry.plugins."invenio_db.alembic"] invenio_userprofiles = "iroko.userprofiles:alembic" diff --git a/run b/run index 25506955..9415b3d4 100755 --- a/run +++ b/run @@ -18,7 +18,7 @@ export FLASK_ENV=development invenio run \ --cert "$script_path"/docker/nginx/test.crt \ --key "$script_path"/docker/nginx/test.key\ - --host "10.16.64.222" \ + # --host "10.16.64.222" \ & pid_server=$! #trap 'kill $pid_celery $pid_server &>/dev/null' EXIT diff --git a/setup.py b/setup.py index 9ebb84eb..3cdf0d7e 100644 --- a/setup.py +++ b/setup.py @@ -142,6 +142,8 @@ 'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles', 'iroko_organizations = iroko.organizations.ext:IrokoOrganizations', 'iroko_persons = iroko.persons.ext:IrokoPersons', + 'iroko_patents = iroko.patents.ext:IrokoPatents', + ], 'invenio_base.blueprints': [ 'iroko = iroko.theme.views:blueprint', @@ -168,18 +170,21 @@ 'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles', 'iroko_organizations = iroko.organizations:IrokoOrganizations', 'iroko_persons = iroko.persons:IrokoPersons', + 'iroko_patents = iroko.patents:IrokoPatents', ], 'invenio_jsonschemas.schemas': [ 'iroko = iroko.records.jsonschemas', 'sources = iroko.sources.schemas', 'organizations = iroko.organizations.jsonschemas', 'persons = iroko.persons.jsonschemas', + 'patents = iroko.patents.jsonschemas', ], 'invenio_search.mappings': [ 'records = iroko.records.mappings', 'sources = iroko.sources.mappings', 'organizations = iroko.organizations.mappings', 'persons = iroko.persons.mappings', + 'patents = iroko.patents.mappings', ], 'invenio_admin.views': [ 'vocabulary_admin = iroko.vocabularies.admin:vocabularies_adminview', @@ -214,6 +219,7 @@ 'iroko_records = iroko.records.rest:api_blueprint', 'iroko_organizations = iroko.organizations.rest:api_blueprint', 'iroko_persons = iroko.persons.rest:api_blueprint', + 'iroko_patents = iroko.patents.rest:api_blueprint', 'iroko_evaluations = iroko.evaluations.rest:api_blueprint', ], 'invenio_celery.tasks': [ @@ -226,6 +232,7 @@ 'srcid = iroko.pidstore.fetchers:iroko_source_uuid_fetcher', 'orgid = iroko.pidstore.fetchers:organization_uuid_fetcher', 'perid = iroko.pidstore.fetchers:person_uuid_fetcher', + 'patid = iroko.pidstore.fetchers:patents_uuid_fetcher', ], 'invenio_pidstore.minters': [ 'irouid = iroko.pidstore.minters:iroko_uuid_minter', @@ -234,6 +241,7 @@ 'srcid = iroko.pidstore.minters:iroko_source_uuid_minter', 'orgid = iroko.pidstore.minters:organization_uuid_minter', 'perid = iroko.pidstore.minters:person_uuid_minter', + 'patid = iroko.pidstore.minters:patents_uuid_minter', ], 'invenio_db.alembic': [ 'invenio_userprofiles = iroko.userprofiles:alembic', From 04f38d51c60ae8a812d9e7905f53a4e1f2011235 Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Wed, 1 Nov 2023 20:15:31 +0100 Subject: [PATCH 4/7] Endpoints --- iroko/config.py | 42 +++++++++++++++ iroko/patents/api.py | 2 +- iroko/patents/importaciones/ocpi.py | 3 +- iroko/patents/rest.py | 82 +++++++++++++++++++++++++++-- iroko/patents/search.py | 2 +- 5 files changed, 124 insertions(+), 7 deletions(-) diff --git a/iroko/config.py b/iroko/config.py index b9c411cc..009357fb 100755 --- a/iroko/config.py +++ b/iroko/config.py @@ -398,6 +398,33 @@ def _(x): } } }, + 'patents': { + 'filters': { + 'status': terms_filter('classification'), + 'country': terms_filter('country'), + 'language': terms_filter('language') + }, + 'aggs': { + 'classification': { + 'terms': { + 'field': 'classification', + 'size': 5 + } + }, + 'country': { + 'terms': { + 'field': 'country', + 'size': 5 + } + }, + 'language': { + 'terms': { + 'field': 'language', + 'size': 5 + } + } + } + }, 'persons': { 'filters': { 'gender': terms_filter('gender'), @@ -471,6 +498,20 @@ def _(x): 'order': 2 } }, + 'patents': { + 'bestmatch': { + 'title': _('Best match'), + 'fields': ['_score'], + 'default_order': 'desc', + 'order': 1 + }, + 'mostrecent': { + 'title': _('Most recent'), + 'fields': ['-_created'], + 'default_order': 'asc', + 'order': 2 + } + }, 'persons': { 'bestmatch': { 'title': _('Best match'), @@ -486,6 +527,7 @@ def _(x): } } } + """Setup sorting options.""" RECORDS_REST_DEFAULT_SORT: { diff --git a/iroko/patents/api.py b/iroko/patents/api.py index bd2c6fec..79fe16c5 100755 --- a/iroko/patents/api.py +++ b/iroko/patents/api.py @@ -84,7 +84,7 @@ def create_or_update(cls, pat_uuid, data, **kwargs): if not pat: print("no pids found, creating patent") created_pat = cls.create(data, iroko_pid_type=pids.PATENT_PID_TYPE, - iroko_pid_value=pat_uuid) + iroko_pid_value=pat_uuid) pat = created_pat msg = 'created' diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py index 8ce936d0..76b8f6b6 100644 --- a/iroko/patents/importaciones/ocpi.py +++ b/iroko/patents/importaciones/ocpi.py @@ -41,12 +41,13 @@ def getData(url): patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() json_patent = json.dumps(patent) - print(json_patent) + # print(json_patent) return soup def nextPage(soup): a = soup.find(attrs= {'id': 'id14'}) url = a['href'] + print(url) return url def pagination(url): diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index 3b6b4968..11459870 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -7,11 +7,16 @@ import os from flask import Blueprint, flash, jsonify, make_response, request +from elasticsearch.exceptions import NotFoundError +from invenio_pidstore.resolver import Resolver +from invenio_pidstore.models import PersistentIdentifier +from invenio_indexer.api import RecordIndexer from iroko.patents.api import PatentRecord from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext from iroko.patents.serializers import json_v1_response from iroko.pidstore import pids +from iroko.utils import IrokoResponseStatus, iroko_json_response api_blueprint = Blueprint( 'iroko_api_patents', @@ -40,9 +45,9 @@ def get_patent_by_pid_canonical(): 'ERROR': 'no pid found'.format(_id) }) -@api_blueprint.route('/import/', methods=['POST']) +@api_blueprint.route('/import', methods=['POST']) # @require_api_auth() -def upload_file(per_uuid): +def upload_file(): # /tmp/iroko/person/.[csv|json] # try: if request.method == 'POST': @@ -62,14 +67,14 @@ def upload_file(per_uuid): if file and allowed_file(file.filename): if 'csv'==get_ext(file.filename): json_path=csv_to_json(file) - PatentRecord.load_from_json_file(json_path, per_uuid) + PatentRecord.load_from_json_file(json_path) response = make_response(jsonify({'msg': 'success'})) return response, 201 else: filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")+'.'+'json' file.save(os.path.join('./data', filename )) - PatentRecord.load_from_json_file(os.path.join('./data',filename),per_uuid ) + PatentRecord.load_from_json_file(os.path.join('./data',filename)) response = make_response(jsonify({'msg': 'success'})) return response ,201 else: @@ -78,3 +83,72 @@ def upload_file(per_uuid): # except Exception as e: # print(e) # return iroko_json_response(IrokoResponseStatus.ERROR, str(e), None, None) + +@api_blueprint.route('//edit', methods=['POST']) +def edit_patent(uuid): + """ + Dado un uuid modificar los datos de una patente + """ + try: + if not request.is_json: + raise Exception("No se especifican datos en formato json para la curacion") + input_data = request.json + print("//////////////////////////////////////////////////////") + print(input_data) + print("///////////////////////////////////////////////////////") + # org = org_json_v1.transform_record(input_data["id"], input_data) + print("-------------------------------------------------------------") + + + print("------------------------------------------------------------") + + pat, msg = PatentRecord.resolve_and_update(uuid, input_data) + + if not pat: + raise Exception("No se encontro record de patente") + + print("entra a la api de editar patentes...........................................") + return jsonify({ + 'SUCCES':"Patente modificada", + 'message':msg, + 'org':pat + }) + except Exception as e: + print(e) + return jsonify({ + 'ERROR': str(e), + }) + +@api_blueprint.route('/new', methods=['POST']) +def create_patent(): + try: + if not request.is_json: + raise Exception("No JSON data provided") + + input_data = request.json + pat, msg = PatentRecord.create(input_data, iroko_pid_type = pids.PATENT_PID_TYPE) + + + return jsonify({ + 'SUCCES':"Patente creada", + 'message':msg, + 'pat':pat + }) + + except Exception as e: + return jsonify({ + 'ERROR': str(e), + }) + +@api_blueprint.route('/delete/', methods=['DELETE']) +def delete_patent(uuid): + + record = PatentRecord.get_record_by_pid_value(uuid) + + if not record: + raise Exception("No se encontro record de patente") + + uuid.delete() + result = record.delete(force=False) + + return result diff --git a/iroko/patents/search.py b/iroko/patents/search.py index 298096e4..d81931f1 100644 --- a/iroko/patents/search.py +++ b/iroko/patents/search.py @@ -13,7 +13,7 @@ class PatentsSearch(RecordsSearch): """RecordsSearch for sources.""" class Meta: - """Search only on organizations index.""" + """Search only on patents index.""" index = "patents" doc_types = None From 829891555f93a345fa9fcc2fbcc5b6f6b9d38496 Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Wed, 8 Nov 2023 15:40:33 +0100 Subject: [PATCH 5/7] create, delete and update --- iroko/config.py | 7 ------- iroko/patents/api.py | 6 ++---- iroko/patents/importaciones/ocpi.py | 10 ++++++---- iroko/patents/mappings/__init__.py | 2 +- iroko/patents/permissions.py | 2 +- iroko/patents/rest.py | 28 ++++++++++++++++++++-------- 6 files changed, 30 insertions(+), 25 deletions(-) diff --git a/iroko/config.py b/iroko/config.py index 009357fb..43c3e01e 100755 --- a/iroko/config.py +++ b/iroko/config.py @@ -400,17 +400,10 @@ def _(x): }, 'patents': { 'filters': { - 'status': terms_filter('classification'), 'country': terms_filter('country'), 'language': terms_filter('language') }, 'aggs': { - 'classification': { - 'terms': { - 'field': 'classification', - 'size': 5 - } - }, 'country': { 'terms': { 'field': 'country', diff --git a/iroko/patents/api.py b/iroko/patents/api.py index 79fe16c5..f2c11374 100755 --- a/iroko/patents/api.py +++ b/iroko/patents/api.py @@ -91,12 +91,10 @@ def create_or_update(cls, pat_uuid, data, **kwargs): return pat, msg @classmethod - def delete(cls, data, vendor=None, delindex=True, force=False): + def delete(cls, pid, vendor=None, delindex=True, force=False): """Delete an IrokoRecord record.""" - assert data.get(cls.pid_uuid_field) - pid = data.get(cls.pid_uuid_field) record = cls.get_record_by_pid_value(pid) - pid.delete() + pid.replace(pid, '') result = record.delete(force=force) if delindex: try: diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py index 76b8f6b6..fddfb5e8 100644 --- a/iroko/patents/importaciones/ocpi.py +++ b/iroko/patents/importaciones/ocpi.py @@ -41,14 +41,16 @@ def getData(url): patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() json_patent = json.dumps(patent) - # print(json_patent) return soup def nextPage(soup): a = soup.find(attrs= {'id': 'id14'}) url = a['href'] - print(url) - return url + hfb = url.find(';') + jh = url.find('?') + cadena = url[hfb:jh] + k = url.replace(cadena, '') + return k def pagination(url): haySiguiente = True @@ -60,7 +62,7 @@ def pagination(url): except: haySiguiente = False - return 'Ok' + return 'ok' print(pagination(website)) diff --git a/iroko/patents/mappings/__init__.py b/iroko/patents/mappings/__init__.py index 3e569253..b94e34a2 100644 --- a/iroko/patents/mappings/__init__.py +++ b/iroko/patents/mappings/__init__.py @@ -13,7 +13,7 @@ """Mappings. Mappings define how organizations and their fields will be indexed in Elasticsearch. -The provided record-v1.0.0.json file is an example of how to index organizations +The provided record-v1.0.0.json file is an example of how to index patents in Elasticsearch. You need to provide one mapping per major version of Elasticsearch you want to support. """ diff --git a/iroko/patents/permissions.py b/iroko/patents/permissions.py index 072331c1..7d823823 100644 --- a/iroko/patents/permissions.py +++ b/iroko/patents/permissions.py @@ -18,7 +18,7 @@ curator_permission = Permission(RoleNeed('curator')) def can_edit_patent_factory(record, *args, **kwargs): - """Checks if logged user can update or delete person items. + """Checks if logged user can update or delete patent items. """ def can(self): if current_user.is_authenticated and curator_permission.can(): diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index 11459870..f68475a9 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -11,12 +11,17 @@ from invenio_pidstore.resolver import Resolver from invenio_pidstore.models import PersistentIdentifier from invenio_indexer.api import RecordIndexer +from flask_login import current_user +from invenio_oauth2server import require_api_auth from iroko.patents.api import PatentRecord from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext from iroko.patents.serializers import json_v1_response from iroko.pidstore import pids from iroko.utils import IrokoResponseStatus, iroko_json_response +from iroko.pidstore.pids import ( + IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas, + ) api_blueprint = Blueprint( 'iroko_api_patents', @@ -45,6 +50,7 @@ def get_patent_by_pid_canonical(): 'ERROR': 'no pid found'.format(_id) }) + @api_blueprint.route('/import', methods=['POST']) # @require_api_auth() def upload_file(): @@ -54,10 +60,10 @@ def upload_file(): print(request.__dict__) print('--------------------------------') print(request.files) - print('--------------------------------') - if 'file' not in request.files: - flash('No file part') - raise Exception("No file part") + # print('--------------------------------') + # if 'file' not in request.files: + # flash('No file part') + # raise Exception("No file part") file = request.files['file'] # If the user does not select a file, the browser submits an # empty file without a filename. @@ -126,7 +132,8 @@ def create_patent(): raise Exception("No JSON data provided") input_data = request.json - pat, msg = PatentRecord.create(input_data, iroko_pid_type = pids.PATENT_PID_TYPE) + pat= PatentRecord.create(input_data, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'ok' return jsonify({ @@ -137,7 +144,7 @@ def create_patent(): except Exception as e: return jsonify({ - 'ERROR': str(e), + 'ERROR HOLA': str(e), }) @api_blueprint.route('/delete/', methods=['DELETE']) @@ -148,7 +155,12 @@ def delete_patent(uuid): if not record: raise Exception("No se encontro record de patente") - uuid.delete() - result = record.delete(force=False) + result = super(PatentRecord, record).delete(force=False) + # if delindex: + try: + RecordIndexer().delete(record) + except NotFoundError: + pass + return result From 93a2ffd9ed4c3f4d2142afc6bde689f23f56e4df Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Mon, 13 Nov 2023 15:15:14 +0100 Subject: [PATCH 6/7] register table --- iroko/config.py | 4 +- iroko/patents/api.py | 33 +++---- iroko/patents/register/marshmallow.py | 25 +++++ iroko/patents/register/model.py | 15 +++ iroko/patents/rest.py | 137 +++++++++++++++++++------- pyproject.toml | 2 + setup.py | 2 + 7 files changed, 162 insertions(+), 56 deletions(-) create mode 100644 iroko/patents/register/marshmallow.py create mode 100644 iroko/patents/register/model.py diff --git a/iroko/config.py b/iroko/config.py index 43c3e01e..a9a2a6a3 100755 --- a/iroko/config.py +++ b/iroko/config.py @@ -400,13 +400,13 @@ def _(x): }, 'patents': { 'filters': { - 'country': terms_filter('country'), + 'country': terms_filter('country.name'), 'language': terms_filter('language') }, 'aggs': { 'country': { 'terms': { - 'field': 'country', + 'field': 'country.name', 'size': 5 } }, diff --git a/iroko/patents/api.py b/iroko/patents/api.py index f2c11374..1599481f 100755 --- a/iroko/patents/api.py +++ b/iroko/patents/api.py @@ -35,23 +35,22 @@ def load_from_json_file(cls, file_path): ) # per = PersonRecord.get_record_by_pid_value(per_pid) with open(file_path) as _file: - patents = json.load(_file, object_hook=remove_nulls) - a = 0 - for data in patents: - a = a + 1 - patent = PatentRecord(data) - del patent['_id'] - print(patent) - patentRecord = None - patentRecord, msg = cls.resolve_and_update(data=patent) - print(patentRecord) - if not patentRecord: - print("no pids found, creating patent") - patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) - msg = 'created' - print('====================================', a) - - + patents = json.load(_file, object_hook=remove_nulls) + a = 0 + for data in patents: + a = a + 1 + patent = PatentRecord(data) + del patent['_id'] + print(patent) + patentRecord = None + patentRecord, msg = cls.resolve_and_update(data=patent) + print(patentRecord) + if not patentRecord: + print("no pids found, creating patent") + patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'created' + print('====================================', a) + @classmethod def get_pat_by_pid(cls, pid_value, with_deleted=False): resolver = Resolver( diff --git a/iroko/patents/register/marshmallow.py b/iroko/patents/register/marshmallow.py new file mode 100644 index 00000000..19db3149 --- /dev/null +++ b/iroko/patents/register/marshmallow.py @@ -0,0 +1,25 @@ +from marshmallow import Schema, fields, post_load + +from iroko.evaluations.models import EvaluationState + + +class RegisterSchema(Schema): + + id = fields.Int() + userEmail = fields.Str(required=False, allow_none=True) + date = fields.DateTime() + patents = fields.Int() + + @post_load + def register_load(self, item, **kwargs): + item['userEmail'] = item['userEmail'] if 'userEmail' in item else '' + item['date'] = item['date'] if 'date' in item else '' + item['patents'] = item['patents'] if 'patents' in item else '' + return item + + +register_schema_many = RegisterSchema( + many=True, only=( + 'id', 'userEmail', 'date', 'patents') + ) +register_schema = RegisterSchema(many=False) diff --git a/iroko/patents/register/model.py b/iroko/patents/register/model.py new file mode 100644 index 00000000..bcb24f71 --- /dev/null +++ b/iroko/patents/register/model.py @@ -0,0 +1,15 @@ +from invenio_accounts.models import User +from invenio_db import db +from sqlalchemy_utils.types import JSONType, UUIDType + +class Register(db.Model): + + __tablename__ = 'iroko_register' + + id = db.Column(db.Integer, primary_key=True) + userEmail = db.Column(db.String) + date = db.Column(db.DateTime, nullable=False) + patents = db.Column(db.Integer) + + #instancia del registro + data = db.Column(JSONType) diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index f68475a9..67b1d482 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -3,7 +3,8 @@ from __future__ import absolute_import, print_function -import datetime +from datetime import datetime +import json import os from flask import Blueprint, flash, jsonify, make_response, request @@ -13,7 +14,11 @@ from invenio_indexer.api import RecordIndexer from flask_login import current_user from invenio_oauth2server import require_api_auth +from invenio_db import db +from iroko.utils import remove_nulls +from iroko.patents.register.model import Register +from iroko.patents.register.marshmallow import register_schema, register_schema_many from iroko.patents.api import PatentRecord from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext from iroko.patents.serializers import json_v1_response @@ -52,43 +57,39 @@ def get_patent_by_pid_canonical(): @api_blueprint.route('/import', methods=['POST']) -# @require_api_auth() def upload_file(): - # /tmp/iroko/person/.[csv|json] - # try: - if request.method == 'POST': - print(request.__dict__) - print('--------------------------------') - print(request.files) - # print('--------------------------------') - # if 'file' not in request.files: - # flash('No file part') - # raise Exception("No file part") - file = request.files['file'] - # If the user does not select a file, the browser submits an - # empty file without a filename. - if file.filename == '': - flash('No selected file') - raise Exception("Not file in request") - if file and allowed_file(file.filename): - if 'csv'==get_ext(file.filename): - json_path=csv_to_json(file) - PatentRecord.load_from_json_file(json_path) - response = make_response(jsonify({'msg': 'success'})) - return response, 201 - else: - filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")+'.'+'json' - - file.save(os.path.join('./data', filename )) - PatentRecord.load_from_json_file(os.path.join('./data',filename)) - response = make_response(jsonify({'msg': 'success'})) - return response ,201 - else: - raise Exception("no valid file extension") - - # except Exception as e: - # print(e) - # return iroko_json_response(IrokoResponseStatus.ERROR, str(e), None, None) + try: + if not request.is_json: + raise Exception("No JSON data provided") + + + input_data = request.json + # patents = json.load(input_data, object_hook=remove_nulls) + # print('patents',patents) + a = 0 + for data in input_data: + print(data) + a = a + 1 + patent = PatentRecord(data) + print(patent) + patentRecord = None + patentRecord, msg = PatentRecord.resolve_and_update(data=patent) + print(patentRecord) + if not patentRecord: + print("no pids found, creating patent") + patentRecord = PatentRecord.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'created' + + return jsonify({ + 'SUCCES':"Patentes creadas", + 'message':msg, + }) + + except Exception as e: + return jsonify({ + 'ERROR': str(e), + }) + @api_blueprint.route('//edit', methods=['POST']) def edit_patent(uuid): @@ -164,3 +165,65 @@ def delete_patent(uuid): return result + +@api_blueprint.route('/register', methods=['GET']) +def get_register(): + try: + count = int(request.args.get('size')) if request.args.get('size') else 10 + page = int(request.args.get('page')) if request.args.get('page') else 1 + + if page < 1: + page = 1 + offset = count * (page - 1) + limit = offset + count + + result = Register.query.all() + total = len(result) + + return iroko_json_response( + IrokoResponseStatus.SUCCESS, \ + 'ok', 'register', \ + { + 'data': register_schema_many.dump(result[offset:limit]), + 'total': total + } + ) + + except Exception as e: + msg = str(e) + return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None) + +@api_blueprint.route('/register/new', methods=['POST']) +def create_register(): + try: + input_data = request.json + register = Register() + register.data = input_data + register.userEmail = input_data.get("userEmail") + register.date = datetime.now() + register.patents = input_data.get("patents") + + db.session.add(register) + db.session.commit() + + msg = "New Register Created" + + except Exception as e: + msg = str(e) + return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None) + + return iroko_json_response( + IrokoResponseStatus.SUCCESS, \ + msg, 'register', \ + register_schema.dump(register) + ) + +@api_blueprint.route('/register/delete/', methods=['DELETE']) +def delete_register(id): + register = Register.query.filter_by(id = id).delete() + db.session.commit() + + return make_response("Eliminado", 204) + + + diff --git a/pyproject.toml b/pyproject.toml index cb7e8e3a..3f18ad3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -168,6 +168,7 @@ iroko_sources = "iroko.sources.models" iroko_harvester = "iroko.harvester.models" invenio_userprofiles = "iroko.userprofiles.models" iroko_evaluations = "iroko.evaluations.models" +iroko_register = "iroko.patents.register.model" [tool.poetry.plugins."invenio_base.api_blueprints"] iroko_taxonomy = "iroko.vocabularies.rest:api_blueprint" @@ -180,6 +181,7 @@ iroko_records = "iroko.records.rest:api_blueprint" iroko_organizations = "iroko.organizations.rest:api_blueprint" iroko_persons = "iroko.persons.rest:api_blueprint" iroko_patents = "iroko.patents.rest:api_blueprint" +iroko_register = "iroko.patents.rest:api_blueprint" iroko_evaluations = "iroko.evaluations.rest:api_blueprint" diff --git a/setup.py b/setup.py index 3cdf0d7e..6287ea09 100644 --- a/setup.py +++ b/setup.py @@ -208,6 +208,7 @@ 'iroko_harvester = iroko.harvester.models', 'invenio_userprofiles = iroko.userprofiles.models', 'iroko_evaluations = iroko.evaluations.models', + 'iroko_register = iroko.patents.register.model', ], 'invenio_base.api_blueprints': [ 'iroko_taxonomy = iroko.vocabularies.rest:api_blueprint', @@ -221,6 +222,7 @@ 'iroko_persons = iroko.persons.rest:api_blueprint', 'iroko_patents = iroko.patents.rest:api_blueprint', 'iroko_evaluations = iroko.evaluations.rest:api_blueprint', + 'iroko_register = iroko.patents.rest:api_blueprint', ], 'invenio_celery.tasks': [ 'iroko_harvester = iroko.harvester.tasks' From 419fbdf3eab48e3e8a8adca965631647d6f60d31 Mon Sep 17 00:00:00 2001 From: laurymesa01 Date: Mon, 12 Feb 2024 20:37:21 +0100 Subject: [PATCH 7/7] Finished --- iroko/api.py | 24 ++- iroko/patents/api.py | 192 +++++++++++++++++- iroko/patents/importaciones/ocpi.py | 1 + .../mappings/v6/patents/patent-v1.0.0.json | 9 +- iroko/patents/permissions.py | 1 + iroko/patents/rest.py | 63 +++--- 6 files changed, 239 insertions(+), 51 deletions(-) diff --git a/iroko/api.py b/iroko/api.py index 9faa76b7..28a415d8 100644 --- a/iroko/api.py +++ b/iroko/api.py @@ -11,6 +11,7 @@ from invenio_records.api import Record from invenio_records_files.api import Record from sqlalchemy.exc import NoResultFound +import json from iroko.pidstore import pids from iroko.pidstore.minters import identifiers_minter, iroko_uuid_minter @@ -68,6 +69,7 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs): persistent_identifier, rec = resolver.resolve(str(iroko_uuid)) if rec: print("{0}={1} found".format(pid_type, iroko_uuid)) + print('REC',rec) rec.update(data) # .update(data, dbcommit=dbcommit, reindex=reindex) return rec, 'updated' @@ -85,16 +87,20 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs): str(identifier[IDENTIFIERS_FIELD_VALUE]) ) print('<<<<<<<<<<<<<<<<<<') - print('rec= ', rec) + print('rec= ', json.dumps(rec, indent=3)) if rec: - print( - "{0}={1} found".format( - schema, str( - identifier[IDENTIFIERS_FIELD_VALUE] - ) - ) - ) + # print( + # "{0}={1} found".format( + # schema, str( + # identifier[IDENTIFIERS_FIELD_VALUE] + # ) + # ) + # ) + # print("===================", data) + print(json.dumps(data, indent=3)) rec.update(data) + print('========================================') + print('>>>>>>>>>>>>>>>>>>>>') print('rec updated: ', rec) return rec, 'updated' @@ -182,6 +188,8 @@ def update(self, data=None, dbcommit=True, reindex=True, override_pids=True): """ Update data for record. override_pids, if True """ + print(json.dumps(data, indent=3), type(data)) + print('begin update') self['_save_info_updated'] = str(date.today()) diff --git a/iroko/patents/api.py b/iroko/patents/api.py index 1599481f..fd7af644 100755 --- a/iroko/patents/api.py +++ b/iroko/patents/api.py @@ -9,6 +9,10 @@ from invenio_pidstore.resolver import Resolver from invenio_pidstore.models import PersistentIdentifier from invenio_indexer.api import RecordIndexer +from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError +from sqlalchemy.exc import NoResultFound + + from iroko.api import IrokoBaseRecord from iroko.organizations.api import OrganizationRecord @@ -16,6 +20,7 @@ from iroko.pidstore import pids from iroko.utils import remove_nulls from iroko.pidstore.pids import ( + IDENTIFIERS_FIELD, IDENTIFIERS_FIELD_VALUE, IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas, ) @@ -50,11 +55,11 @@ def load_from_json_file(cls, file_path): patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) msg = 'created' print('====================================', a) - + @classmethod def get_pat_by_pid(cls, pid_value, with_deleted=False): resolver = Resolver( - pid_type=PATENT_PID_TYPE, + pid_type='doi', object_type=IROKO_OBJECT_TYPE, getter=cls.get_record, ) @@ -63,14 +68,14 @@ def get_pat_by_pid(cls, pid_value, with_deleted=False): except Exception: pass - for pid_type in identifiers_schemas: - try: - resolver.pid_type = pid_type - schemapid, pat = resolver.resolve(pid_value) - pid = PersistentIdentifier.get(PATENT_PID_TYPE, pat['id']) - return pid, pat - except Exception as e: - pass + # for pid_type in identifiers_schemas: + # try: + # resolver.pid_type = pid_type + # schemapid, pat = resolver.resolve(pid_value) + # pid = PersistentIdentifier.get(PATENT_PID_TYPE, pat['id']) + # return pid, pat + # except Exception as e: + # pass return None, None @classmethod @@ -89,6 +94,66 @@ def create_or_update(cls, pat_uuid, data, **kwargs): return pat, msg + + @classmethod + def update_imported(cls, pat_uuid=None, data={}): + resolver = Resolver( + pid_type=pids.RECORD_PID_TYPE, + object_type=IROKO_OBJECT_TYPE, + getter=cls.get_record, + ) + if IDENTIFIERS_FIELD in data: # Si no lo encontro por el uuid, igual se intenta buscar + # desde cualquier otri pid + for schema in identifiers_schemas: + for identifier in data[IDENTIFIERS_FIELD]: + if schema == identifier[IDENTIFIERS_FIELD_TYPE]: + # print("identifier ------ ", identifier) + resolver.pid_type = schema + try: + persistent_identifier, rec = resolver.resolve( + str(identifier[IDENTIFIERS_FIELD_VALUE]) + ) + print('<<<<<<<<<<<<<<<<<<') + print('rec= ', json.dumps(rec, indent=3)) + print('data= ', json.dumps(rec, indent=3)) + if rec: + resolver.pid_type = pids.PATENT_PID_TYPE + uuid = rec["id"] + print(uuid) + try: + persistent_identifier, rec = resolver.resolve(str(uuid)) + print('rec= ', json.dumps(rec, indent=3)) + if rec: + print('REC',rec) + rec.update(data) + return rec, 'updated' + except Exception: + pass + print('========================================') + + print('>>>>>>>>>>>>>>>>>>>>') + print('rec updated: ', rec) + return rec, 'updated' + except PIDDoesNotExistError as pidno: + print( + "PIDDoesNotExistError: {0} == {1}".format( + schema, + str( + identifier[ + IDENTIFIERS_FIELD_VALUE] + ) + ) + ) + except (PIDDeletedError, NoResultFound) as ex: + cls.__delete_pids_without_object(data[IDENTIFIERS_FIELD]) + except Exception as e: + print('-------------------------------') + # print(str(e)) + print(traceback.format_exc()) + print('-------------------------------') + pass + return None, None + @classmethod def delete(cls, pid, vendor=None, delindex=True, force=False): """Delete an IrokoRecord record.""" @@ -102,6 +167,113 @@ def delete(cls, pid, vendor=None, delindex=True, force=False): pass return result + + def fix_patents_imported(patent): + if 'identifiers' in patent: + patent['identifiers'] = patent['identifiers'] + + if 'country' in patent: + patent['country'] = patent['country'] + else: + patent['country'] = {'code': '', 'name': ''} + + if 'affiliations' in patent: + patent['affiliations'] = patent['affiliations'] + else: + patent['affiliations'] = [] + + if 'authors' in patent: + patent['authors'] = patent['authors'] + else: + patent['authors'] = [] + + if 'language' in patent: + patent['language'] = patent['language'] + else: + patent['language'] = '' + + if 'classification' in patent: + patent['classification'] = patent['classification'] + else: + patent['classification'] = '' + + if 'link' in patent: + patent['link'] = patent['link'] + else: + patent['link'] = '' + + if 'summary' in patent: + patent['summary'] = patent['summary'] + else: + patent['summary'] = '' + + return patent + + def fix_gp_imported(patent): + if 'id' in patent: + identifiers = [] + identifiers.append({ + 'idtype': 'doi', + 'value': patent['id'] + }) + patent['identifiers'] = identifiers + del patent['id'] + + if 'assignee' in patent: + affiliations = [] + for affiliation in patent['assignee']: + affiliations.append({ + 'identifiers': [], + 'name': affiliation + }) + patent['affiliations'] = affiliations + del patent['assignee'] + + else : + patent['affiliations'] = [] + + if 'author' in patent and len(patent['author']) > 0: + authors = [] + for author in patent['author']: + authors.append({ + 'identifiers': [], + 'name': author + }) + patent['authors'] = authors + del patent['author'] + + else : + patent['authors'] = [] + + patent['language'] = '' + patent['country'] = {'code': '', 'name': ''} + patent['classification'] = '' + del patent[''] + + if 'filing/creation date' in patent: + patent['creation_date'] = patent['filing/creation date'] + del patent['filing/creation date'] + + if 'grant date' in patent: + patent['grant_date'] = patent['grant date'] + del patent['grant date'] + + if 'priority date' in patent: + del patent['priority date'] + + if 'publication date' in patent: + patent['publication_date'] = patent['publication date'] + del patent['publication date'] + + if 'result link' in patent: + patent['link'] = patent['result link'] + del patent['result link'] + + return patent + + + + def fixture_spi_fields(person: PersonRecord, org: OrganizationRecord): """hard code fixtures of spi data, coming from human resources of cuban institutions """ country_code = 'cu' diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py index fddfb5e8..b26cb968 100644 --- a/iroko/patents/importaciones/ocpi.py +++ b/iroko/patents/importaciones/ocpi.py @@ -41,6 +41,7 @@ def getData(url): patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() json_patent = json.dumps(patent) + print(json_patent) return soup def nextPage(soup): diff --git a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json index 010122de..6a3ac3d0 100644 --- a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json +++ b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json @@ -23,7 +23,8 @@ } }, "title": { - "type": "keyword" + "type": "text", + "analyzer": "spanish" }, "authors": { "type": "object", @@ -64,10 +65,12 @@ } }, "summary": { - "type": "keyword" + "type": "text", + "analyzer": "spanish" }, "classification": { - "type": "keyword" + "type": "text", + "analyzer": "spanish" }, "claims": { "type": "keyword" diff --git a/iroko/patents/permissions.py b/iroko/patents/permissions.py index 7d823823..112e6442 100644 --- a/iroko/patents/permissions.py +++ b/iroko/patents/permissions.py @@ -16,6 +16,7 @@ from invenio_access import Permission curator_permission = Permission(RoleNeed('curator')) +admin_permission = Permission(RoleNeed('admin')) def can_edit_patent_factory(record, *args, **kwargs): """Checks if logged user can update or delete patent items. diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py index 67b1d482..76d45e4b 100644 --- a/iroko/patents/rest.py +++ b/iroko/patents/rest.py @@ -3,7 +3,7 @@ from __future__ import absolute_import, print_function -from datetime import datetime +from datetime import datetime, date import json import os @@ -16,7 +16,10 @@ from invenio_oauth2server import require_api_auth from invenio_db import db from iroko.utils import remove_nulls +from flask_principal import RoleNeed +from invenio_access import Permission +from iroko.api import IrokoBaseRecord from iroko.patents.register.model import Register from iroko.patents.register.marshmallow import register_schema, register_schema_many from iroko.patents.api import PatentRecord @@ -61,35 +64,30 @@ def upload_file(): try: if not request.is_json: raise Exception("No JSON data provided") - - input_data = request.json - # patents = json.load(input_data, object_hook=remove_nulls) - # print('patents',patents) - a = 0 + print('=======================', input_data) for data in input_data: - print(data) - a = a + 1 - patent = PatentRecord(data) - print(patent) - patentRecord = None - patentRecord, msg = PatentRecord.resolve_and_update(data=patent) - print(patentRecord) + if 'assignee' in data: + patent = PatentRecord.fix_gp_imported(data) + else: + patent = PatentRecord.fix_patents_imported(data) + patentRecord, msg = PatentRecord.resolve_and_update(data = patent) + print('aaaaaaaaaaa',patentRecord) if not patentRecord: print("no pids found, creating patent") patentRecord = PatentRecord.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) msg = 'created' - return jsonify({ - 'SUCCES':"Patentes creadas", - 'message':msg, - }) - except Exception as e: return jsonify({ - 'ERROR': str(e), + 'ERROR HOLA': str(e), }) + return jsonify({ + 'SUCCES':"Patentes creadas", + 'message':msg, + }) + @api_blueprint.route('//edit', methods=['POST']) def edit_patent(uuid): @@ -100,14 +98,8 @@ def edit_patent(uuid): if not request.is_json: raise Exception("No se especifican datos en formato json para la curacion") input_data = request.json - print("//////////////////////////////////////////////////////") print(input_data) - print("///////////////////////////////////////////////////////") # org = org_json_v1.transform_record(input_data["id"], input_data) - print("-------------------------------------------------------------") - - - print("------------------------------------------------------------") pat, msg = PatentRecord.resolve_and_update(uuid, input_data) @@ -133,9 +125,18 @@ def create_patent(): raise Exception("No JSON data provided") input_data = request.json + id = input_data['identifiers'][0]['value'] + pid, patent = PatentRecord.get_pat_by_pid(id) + print('PID',pid) + + + if pid: + raise Exception("Patente existente") + pat= PatentRecord.create(input_data, iroko_pid_type=pids.PATENT_PID_TYPE) msg = 'ok' + print('PAT',pat) return jsonify({ 'SUCCES':"Patente creada", @@ -145,21 +146,23 @@ def create_patent(): except Exception as e: return jsonify({ - 'ERROR HOLA': str(e), + 'ERROR': str(e), }) @api_blueprint.route('/delete/', methods=['DELETE']) def delete_patent(uuid): - record = PatentRecord.get_record_by_pid_value(uuid) + record = IrokoBaseRecord.get_record_by_pid_value(uuid) if not record: raise Exception("No se encontro record de patente") - result = super(PatentRecord, record).delete(force=False) + result = super(IrokoBaseRecord, record).delete(force=False) + db.session.commit() # if delindex: try: RecordIndexer().delete(record) + db.session.commit() except NotFoundError: pass @@ -200,7 +203,7 @@ def create_register(): register = Register() register.data = input_data register.userEmail = input_data.get("userEmail") - register.date = datetime.now() + register.date = input_data.get("date") register.patents = input_data.get("patents") db.session.add(register) @@ -215,7 +218,7 @@ def create_register(): return iroko_json_response( IrokoResponseStatus.SUCCESS, \ msg, 'register', \ - register_schema.dump(register) + register_schema.dump(register), ) @api_blueprint.route('/register/delete/', methods=['DELETE'])