diff --git a/iroko/api.py b/iroko/api.py index 9faa76b7..28a415d8 100644 --- a/iroko/api.py +++ b/iroko/api.py @@ -11,6 +11,7 @@ from invenio_records.api import Record from invenio_records_files.api import Record from sqlalchemy.exc import NoResultFound +import json from iroko.pidstore import pids from iroko.pidstore.minters import identifiers_minter, iroko_uuid_minter @@ -68,6 +69,7 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs): persistent_identifier, rec = resolver.resolve(str(iroko_uuid)) if rec: print("{0}={1} found".format(pid_type, iroko_uuid)) + print('REC',rec) rec.update(data) # .update(data, dbcommit=dbcommit, reindex=reindex) return rec, 'updated' @@ -85,16 +87,20 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs): str(identifier[IDENTIFIERS_FIELD_VALUE]) ) print('<<<<<<<<<<<<<<<<<<') - print('rec= ', rec) + print('rec= ', json.dumps(rec, indent=3)) if rec: - print( - "{0}={1} found".format( - schema, str( - identifier[IDENTIFIERS_FIELD_VALUE] - ) - ) - ) + # print( + # "{0}={1} found".format( + # schema, str( + # identifier[IDENTIFIERS_FIELD_VALUE] + # ) + # ) + # ) + # print("===================", data) + print(json.dumps(data, indent=3)) rec.update(data) + print('========================================') + print('>>>>>>>>>>>>>>>>>>>>') print('rec updated: ', rec) return rec, 'updated' @@ -182,6 +188,8 @@ def update(self, data=None, dbcommit=True, reindex=True, override_pids=True): """ Update data for record. override_pids, if True """ + print(json.dumps(data, indent=3), type(data)) + print('begin update') self['_save_info_updated'] = str(date.today()) diff --git a/iroko/config.py b/iroko/config.py index a7373192..a9a2a6a3 100755 --- a/iroko/config.py +++ b/iroko/config.py @@ -28,13 +28,17 @@ from iroko.organizations.api import OrganizationRecord from iroko.organizations.permissions import can_edit_organization_factory from iroko.organizations.search import OrganizationSearch +from iroko.patents.api import PatentRecord +from iroko.patents.search import PatentsSearch +from iroko.patents.permissions import can_edit_patent_factory from iroko.persons.api import PersonRecord from iroko.persons.permissions import can_edit_person_factory from iroko.persons.search import PersonsSearch from iroko.pidstore import pids as pids from iroko.pidstore.pids import ( ORGANIZATION_PID_FETCHER, ORGANIZATION_PID_MINTER, - ORGANIZATION_PID_TYPE, PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE, + ORGANIZATION_PID_TYPE, PATENT_PID_FETCHER, PATENT_PID_MINTER, PATENT_PID_TYPE, + PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE, ) from iroko.records.api import IrokoRecord from iroko.records.search import IrokoRecordSearch @@ -122,6 +126,10 @@ def _(x): _ORG_CONVERTER = ( 'pid(orgid, record_class="iroko.organizations.api.OrganizationRecord")' ) +_PATENT_CONVERTER = ( + 'pid(patid, record_class="iroko.patents.api.PatentRecord")' +) + _PERSON_CONVERTER = ( 'pid(perid, record_class="iroko.persons.api.PersonRecord")' ) @@ -224,6 +232,37 @@ def _(x): 'delete_permission_factory_imp': can_edit_organization_factory, 'list_permission_factory_imp': allow_all }, + 'patid': { + 'pid_type': PATENT_PID_TYPE, + 'pid_minter': PATENT_PID_MINTER, + 'pid_fetcher': PATENT_PID_FETCHER, + 'default_endpoint_prefix': True, + 'record_class': PatentRecord, + 'search_class': PatentsSearch, + 'indexer_class': RecordIndexer, + 'record_serializers': { + 'application/json': ('iroko.patents.serializers' + ':json_v1_response'), + }, + 'search_serializers': { + 'application/json': ('iroko.patents.serializers' + ':json_v1_search'), + }, + 'record_loaders': { + 'application/json': ('iroko.patents.loaders' + ':json_v1'), + }, + 'list_route': '/search/patents/', + 'item_route': '/pid/patent/<{0}:pid_value>'.format(_PATENT_CONVERTER), + 'default_media_type': 'application/json', + 'max_result_window': 10000, + 'error_handlers': {}, + 'create_permission_factory_imp': can_edit_patent_factory, + 'read_permission_factory_imp': check_elasticsearch, + 'update_permission_factory_imp': can_edit_patent_factory, + 'delete_permission_factory_imp': can_edit_patent_factory, + 'list_permission_factory_imp': allow_all + }, 'perid': { 'pid_type': PERSON_PID_TYPE, 'pid_minter': PERSON_PID_MINTER, @@ -359,6 +398,26 @@ def _(x): } } }, + 'patents': { + 'filters': { + 'country': terms_filter('country.name'), + 'language': terms_filter('language') + }, + 'aggs': { + 'country': { + 'terms': { + 'field': 'country.name', + 'size': 5 + } + }, + 'language': { + 'terms': { + 'field': 'language', + 'size': 5 + } + } + } + }, 'persons': { 'filters': { 'gender': terms_filter('gender'), @@ -432,6 +491,20 @@ def _(x): 'order': 2 } }, + 'patents': { + 'bestmatch': { + 'title': _('Best match'), + 'fields': ['_score'], + 'default_order': 'desc', + 'order': 1 + }, + 'mostrecent': { + 'title': _('Most recent'), + 'fields': ['-_created'], + 'default_order': 'asc', + 'order': 2 + } + }, 'persons': { 'bestmatch': { 'title': _('Best match'), @@ -447,6 +520,7 @@ def _(x): } } } + """Setup sorting options.""" RECORDS_REST_DEFAULT_SORT: { @@ -462,6 +536,10 @@ def _(x): 'query': 'bestmatch', 'noquery': 'bestmatch', }, + 'patents': { + 'query': 'bestmatch', + 'noquery': 'bestmatch', + }, 'persons': { 'query': 'bestmatch', 'noquery': 'bestmatch', diff --git a/iroko/patents/__init__.py b/iroko/patents/__init__.py new file mode 100755 index 00000000..36c59dd1 --- /dev/null +++ b/iroko/patents/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + + +"""iroko.""" + +from __future__ import absolute_import, print_function + +from .ext import IrokoPatents + +__all__ = ('IrokoPatents', ) diff --git a/iroko/patents/api.py b/iroko/patents/api.py new file mode 100755 index 00000000..fd7af644 --- /dev/null +++ b/iroko/patents/api.py @@ -0,0 +1,313 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# +import json + +from elasticsearch.exceptions import NotFoundError +from invenio_pidstore.resolver import Resolver +from invenio_pidstore.models import PersistentIdentifier +from invenio_indexer.api import RecordIndexer +from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError +from sqlalchemy.exc import NoResultFound + + + +from iroko.api import IrokoBaseRecord +from iroko.organizations.api import OrganizationRecord +from iroko.persons.api import PersonRecord +from iroko.pidstore import pids +from iroko.utils import remove_nulls +from iroko.pidstore.pids import ( + IDENTIFIERS_FIELD, IDENTIFIERS_FIELD_VALUE, + IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas, + ) + + +class PatentRecord (IrokoBaseRecord): + _schema = "patents/patent-v1.0.0.json" + + @classmethod + def load_from_json_file(cls, file_path): + """bulk import of patent from a json file + expect spi format""" + + resolver = Resolver( + pid_type=pids.PATENT_PID_TYPE, + object_type=pids.IROKO_OBJECT_TYPE, + getter=PatentRecord.get_record, + ) + # per = PersonRecord.get_record_by_pid_value(per_pid) + with open(file_path) as _file: + patents = json.load(_file, object_hook=remove_nulls) + a = 0 + for data in patents: + a = a + 1 + patent = PatentRecord(data) + del patent['_id'] + print(patent) + patentRecord = None + patentRecord, msg = cls.resolve_and_update(data=patent) + print(patentRecord) + if not patentRecord: + print("no pids found, creating patent") + patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'created' + print('====================================', a) + + @classmethod + def get_pat_by_pid(cls, pid_value, with_deleted=False): + resolver = Resolver( + pid_type='doi', + object_type=IROKO_OBJECT_TYPE, + getter=cls.get_record, + ) + try: + return resolver.resolve(str(pid_value)) + except Exception: + pass + + # for pid_type in identifiers_schemas: + # try: + # resolver.pid_type = pid_type + # schemapid, pat = resolver.resolve(pid_value) + # pid = PersistentIdentifier.get(PATENT_PID_TYPE, pat['id']) + # return pid, pat + # except Exception as e: + # pass + return None, None + + @classmethod + def create_or_update(cls, pat_uuid, data, **kwargs): + """Create or update PatentRecord.""" + + # assert pat_uuid + pat, msg = cls.resolve_and_update(pat_uuid, data) + # if resolve_and_update do no return, then is not existed pat, so trying to create one + if not pat: + print("no pids found, creating patent") + created_pat = cls.create(data, iroko_pid_type=pids.PATENT_PID_TYPE, + iroko_pid_value=pat_uuid) + pat = created_pat + msg = 'created' + + return pat, msg + + + @classmethod + def update_imported(cls, pat_uuid=None, data={}): + resolver = Resolver( + pid_type=pids.RECORD_PID_TYPE, + object_type=IROKO_OBJECT_TYPE, + getter=cls.get_record, + ) + if IDENTIFIERS_FIELD in data: # Si no lo encontro por el uuid, igual se intenta buscar + # desde cualquier otri pid + for schema in identifiers_schemas: + for identifier in data[IDENTIFIERS_FIELD]: + if schema == identifier[IDENTIFIERS_FIELD_TYPE]: + # print("identifier ------ ", identifier) + resolver.pid_type = schema + try: + persistent_identifier, rec = resolver.resolve( + str(identifier[IDENTIFIERS_FIELD_VALUE]) + ) + print('<<<<<<<<<<<<<<<<<<') + print('rec= ', json.dumps(rec, indent=3)) + print('data= ', json.dumps(rec, indent=3)) + if rec: + resolver.pid_type = pids.PATENT_PID_TYPE + uuid = rec["id"] + print(uuid) + try: + persistent_identifier, rec = resolver.resolve(str(uuid)) + print('rec= ', json.dumps(rec, indent=3)) + if rec: + print('REC',rec) + rec.update(data) + return rec, 'updated' + except Exception: + pass + print('========================================') + + print('>>>>>>>>>>>>>>>>>>>>') + print('rec updated: ', rec) + return rec, 'updated' + except PIDDoesNotExistError as pidno: + print( + "PIDDoesNotExistError: {0} == {1}".format( + schema, + str( + identifier[ + IDENTIFIERS_FIELD_VALUE] + ) + ) + ) + except (PIDDeletedError, NoResultFound) as ex: + cls.__delete_pids_without_object(data[IDENTIFIERS_FIELD]) + except Exception as e: + print('-------------------------------') + # print(str(e)) + print(traceback.format_exc()) + print('-------------------------------') + pass + return None, None + + @classmethod + def delete(cls, pid, vendor=None, delindex=True, force=False): + """Delete an IrokoRecord record.""" + record = cls.get_record_by_pid_value(pid) + pid.replace(pid, '') + result = record.delete(force=force) + if delindex: + try: + RecordIndexer().delete(record) + except NotFoundError: + pass + return result + + + def fix_patents_imported(patent): + if 'identifiers' in patent: + patent['identifiers'] = patent['identifiers'] + + if 'country' in patent: + patent['country'] = patent['country'] + else: + patent['country'] = {'code': '', 'name': ''} + + if 'affiliations' in patent: + patent['affiliations'] = patent['affiliations'] + else: + patent['affiliations'] = [] + + if 'authors' in patent: + patent['authors'] = patent['authors'] + else: + patent['authors'] = [] + + if 'language' in patent: + patent['language'] = patent['language'] + else: + patent['language'] = '' + + if 'classification' in patent: + patent['classification'] = patent['classification'] + else: + patent['classification'] = '' + + if 'link' in patent: + patent['link'] = patent['link'] + else: + patent['link'] = '' + + if 'summary' in patent: + patent['summary'] = patent['summary'] + else: + patent['summary'] = '' + + return patent + + def fix_gp_imported(patent): + if 'id' in patent: + identifiers = [] + identifiers.append({ + 'idtype': 'doi', + 'value': patent['id'] + }) + patent['identifiers'] = identifiers + del patent['id'] + + if 'assignee' in patent: + affiliations = [] + for affiliation in patent['assignee']: + affiliations.append({ + 'identifiers': [], + 'name': affiliation + }) + patent['affiliations'] = affiliations + del patent['assignee'] + + else : + patent['affiliations'] = [] + + if 'author' in patent and len(patent['author']) > 0: + authors = [] + for author in patent['author']: + authors.append({ + 'identifiers': [], + 'name': author + }) + patent['authors'] = authors + del patent['author'] + + else : + patent['authors'] = [] + + patent['language'] = '' + patent['country'] = {'code': '', 'name': ''} + patent['classification'] = '' + del patent[''] + + if 'filing/creation date' in patent: + patent['creation_date'] = patent['filing/creation date'] + del patent['filing/creation date'] + + if 'grant date' in patent: + patent['grant_date'] = patent['grant date'] + del patent['grant date'] + + if 'priority date' in patent: + del patent['priority date'] + + if 'publication date' in patent: + patent['publication_date'] = patent['publication date'] + del patent['publication date'] + + if 'result link' in patent: + patent['link'] = patent['result link'] + del patent['result link'] + + return patent + + + + +def fixture_spi_fields(person: PersonRecord, org: OrganizationRecord): + """hard code fixtures of spi data, coming from human resources of cuban institutions """ + country_code = 'cu' + country = 'Cuba' + if 'addresses' in org and len(org['addresses']) > 0: + country_code = org['addresses'][0]['country_code'] + country = org['addresses'][0]['country'] + person['country'] = {'code': country_code, 'name': country} + + if 'institutional_email' in person and len(person['institutional_email']) > 0: + person.add_email_address(person['institutional_email']) + if 'emails' in person: + for ma in person['emails']: + person.add_email_address(person['institutional_email']) + if 'lastName' in person: + person['last_name'] = person['lastName'] + + person.pop('lastName') + person.pop('institutional_email') + person.pop('emails') + + new_identifiers = [] + for identifier in person[pids.IDENTIFIERS_FIELD]: + if identifier['idtype'] == 'noCi': + new_identifiers.append({ + 'idtype': 'dni', + 'value': 'dni:' + country_code + '.' + identifier['idvalue'], + }) + elif identifier['idtype'] == 'idExpediente': + new_identifiers.append({ + 'idtype': 'hrid', + 'value': 'hrid:' + str(org.id) + '.' + identifier['idvalue'], + }) + else: + new_identifiers.append(identifier) + person[pids.IDENTIFIERS_FIELD] = new_identifiers + return person diff --git a/iroko/patents/cli.py b/iroko/patents/cli.py new file mode 100644 index 00000000..0d8862ce --- /dev/null +++ b/iroko/patents/cli.py @@ -0,0 +1,27 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +import os + +import click +from flask import current_app +from flask.cli import with_appcontext + +from iroko.patents.api import PatentRecord + + +@click.group() +def patents(): + """Command related to patents iroko data.""" + + +@patents.command() +@click.argument('patid') +@with_appcontext +def import_from_file(patid): + """Load from specific file en data/patents/patents.json""" + + datadir = current_app.config['IROKO_DATA_DIRECTORY'] + file_path = os.path.join(datadir, 'patents', 'patents.json') + PatentRecord.load_from_json_file(file_path, patid) diff --git a/iroko/patents/ext.py b/iroko/patents/ext.py new file mode 100644 index 00000000..5a63e142 --- /dev/null +++ b/iroko/patents/ext.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + + +"""Flask extension for Iroko Patents.""" + +from __future__ import absolute_import, print_function + +from iroko.patents.cli import patents + + +class IrokoPatents(object): + """Iroko extension.""" + + def __init__(self, app=None): + """Extension initialization.""" + if app: + self.init_app(app) + + def init_app(self, app): + """Flask application initialization.""" + app.cli.add_command(patents) + # self.init_config(app) + app.extensions['iroko-patents'] = self diff --git a/iroko/patents/fixtures.py b/iroko/patents/fixtures.py new file mode 100644 index 00000000..04ffd912 --- /dev/null +++ b/iroko/patents/fixtures.py @@ -0,0 +1,103 @@ +import datetime +import os +from typing import List +from unicodedata import normalize + +from pandas import DataFrame, read_csv + +from iroko.records.api import IrokoRecord +from iroko.records.search import IrokoRecordSearch + + +def _is_cuban_affiliation(affiliation: str): + fix_words = ['cuba', 'pinar del rio', 'artemisa' + , 'mayabeque', 'matanzas', 'habana' + , 'cienfuegos', 'villa clara', 'santa clara' + , 'santi spiritus', 'ciego de avila' + , 'camaguey', 'las tunas', 'bayamo', 'holguin' + , 'santiago de cuba', 'guantanamo'] + af = normalize('NFC', affiliation.lower()) + for word in fix_words: + if word in af: + return True + return False + +def _is_university_affiliation(affiliation: str): + fix_words = ['universidad', 'university'] + af = normalize('NFC', affiliation.lower()) + for word in fix_words: + if word in af: + return True + return False + + +def _creator_is_cuban(creator): + if 'affiliations' in creator: + for aff in creator['affiliations']: + if _is_cuban_affiliation(aff): + return True + return False + + +def _creator_is_author(creator): + if 'roles' in creator: + for role in creator['roles']: + if role == 'Author': + return True + return False + + +def get_cuban_authors_from_record(rec: IrokoRecord): + authors: List[dict] = [] + if 'creators' in rec: + for creator in rec['creators']: + if _creator_is_author(creator) and _creator_is_cuban(creator): + authors.append(creator) + return authors + + +def get_all_cubans_authors_from_records(): + search = IrokoRecordSearch() + cubans = dict() + universities = dict() + for hit in search.scan(): + record = IrokoRecord.get_record_by_pid_value(hit.id) + authors = get_cuban_authors_from_record(record) + for aut in authors: + if 'name' in aut and aut['name'] not in cubans: + cubans[aut['name']] = aut + for aff in aut['affiliations']: + if _is_university_affiliation(aff): + universities[aut['name']] = aut + return cubans, universities + +def _tmp_func(): + search = IrokoRecordSearch() + last:str = '2022-12-31' + universities = dict() + for hit in search.scan(): + record = IrokoRecord.get_record_by_pid_value(hit.id) + cur = record['publication_date'] + if last > cur: + last = cur + print('---------------------') + print('---------------------') + print(last) + print(record) + print('---------------------') + print('---------------------') +#Helpers for file uploads +def allowed_file(filename): + ALLOWED_EXTENSIONS = {'csv', 'json'} + return '.' in filename and \ + filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS + +def get_ext(filename): + return filename.rsplit('.', 1)[1].lower() + +def csv_to_json(file): + filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + file.save(os.path.join('./data',filename+'.'+get_ext(file.filename))) + df= read_csv(os.path.join('./data')+'/'+filename+'.'+get_ext(file.filename)) + DataFrame.to_json(df,path_or_buf=os.path.join('./data',filename+'.json'),orient='records') + return os.path.join('./data',filename+'.json') diff --git a/iroko/patents/importaciones/google_patents/gp.py b/iroko/patents/importaciones/google_patents/gp.py new file mode 100644 index 00000000..51631ee1 --- /dev/null +++ b/iroko/patents/importaciones/google_patents/gp.py @@ -0,0 +1,7 @@ +import requests +import wget +from time import sleep +from requests_html import HTMLSession +import bs4 as bs + + diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py new file mode 100644 index 00000000..b26cb968 --- /dev/null +++ b/iroko/patents/importaciones/ocpi.py @@ -0,0 +1,80 @@ +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. +# + +import json +import requests +import bs4 as bs +from lxml import html + +website = 'https://wiposearch.ocpi.cu/wopublish-search/public/patents' + +patent = { + "identifiers": "", + "title": "", + "authors": [], + "affiliations": [], + "country": "", + "language": "", + "creation_date": "", + "grant_date": "", + "publication_date": "", + "legal_status": "", +} + +def getData(url): + resultado = requests.get(url) + content = resultado.text + soup = bs.BeautifulSoup(content, 'html.parser') + rows = soup.find('table', {'class': 'table table-view COLUMN'}).find('tbody').find_all('tr') + for row in rows: + patent["identifiers"] = row.find_all('td')[2].get_text().rstrip() + patent["title"] = row.find_all('td')[1].get_text().rstrip() + patent["authors"] = row.find_all('td')[9].get_text().rstrip() + patent["affiliations"] = row.find_all('td')[8].get_text().rstrip() + patent["country"] = "Cuba" + patent["language"] = "spanish" + patent["creation_date"] = row.find_all('td')[3].get_text().rstrip() + patent["grant_date"] = row.find_all('td')[5].get_text().rstrip() + patent["publication_date"] = row.find_all('td')[6].get_text().rstrip() + patent["legal_status"] = row.find_all('td')[11].get_text().rstrip() + json_patent = json.dumps(patent) + print(json_patent) + return soup + +def nextPage(soup): + a = soup.find(attrs= {'id': 'id14'}) + url = a['href'] + hfb = url.find(';') + jh = url.find('?') + cadena = url[hfb:jh] + k = url.replace(cadena, '') + return k + +def pagination(url): + haySiguiente = True + while(haySiguiente): + try: + soup = getData(url) + url_siguiente = nextPage(soup) + url = url_siguiente + except: + haySiguiente = False + + return 'ok' + +print(pagination(website)) + + + + + + + + + + + + diff --git a/iroko/patents/jsonschemas/__init__.py b/iroko/patents/jsonschemas/__init__.py new file mode 100644 index 00000000..a1a2aa68 --- /dev/null +++ b/iroko/patents/jsonschemas/__init__.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + + +"""JSON schemas. + +JSON schemas are used to define the structure of the record data of your +instance. + +The documentation of +`Invenio-JSONSchemas `_ +describes their usage and configuration options. +""" diff --git a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json new file mode 100644 index 00000000..67c7e510 --- /dev/null +++ b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json @@ -0,0 +1,176 @@ +{ + "$schema": "http://json-schema.org/draft-04/schema#", + "id": "http://localhost/schemas/patents/patent-v1.0.0.json", + "title": "Patent Schema", + "type": "object", + "additionalProperties": true, + "required": [ + "id", + "identifiers" + ], + "properties": { + "id": { + "type": "string", + "description": "Iroko UUID, pid_type = patid" + }, + "identifiers": { + "type": "array", + "description": "Patent Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + }, + "title": { + "type": "string", + "description": "The title of the patent." + }, + "authors": { + "type": "array", + "description": "A list with the inventors of the patent", + "minItems": 0, + "items":{ + "type": "object", + "properties": { + "name": { + "description": "The name of the author", + "type": "string" + }, + "identifiers": { + "type": "array", + "description": "Person Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + } + } + } + }, + "affiliations": { + "type": "array", + "description": "A list with the affiliations of the patent", + "minItems": 0, + "items":{ + "type": "object", + "properties": { + "name": { + "description": "The name of the affiliation", + "type": "string" + }, + "identifiers": { + "type": "array", + "description": "Person Identifiers", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "idtype": { + "description": "identifier type", + "type": "string" + }, + "value": { + "type": "string" + } + } + } + } + } + } + }, + "summary": { + "type": "string", + "description": "A summary of the patent" + }, + "classification": { + "type": "string", + "description": "The classification of the patent" + }, + "claims": { + "type": "string", + "description": "An url to the file with the claims of the patent which define in technical terms why protection for the invention is requested" + }, + "prior_art": { + "type": "string", + "description": "An url to the file with the background and description of the problem" + }, + "drawing": { + "type": "string", + "description": "An url with a drawing with the design of what you want to patent " + }, + "countries": { + "type": "array", + "description": "A list of the countries where the inventor wants to patent the product", + "items": { + "type": "string" + } + }, + "country": { + "type": "object", + "description": "The country where originally the patent was presented", + "properties": { + "code": { + "type": "string", + "description": "The ISO 3166-1 alpha-2 code of the country" + }, + "name": { + "type": "string", + "description": "The name of the country" + } + } + }, + "language": { + "type": "string", + "description": "The language of the patent" + }, + "key_words": { + "type": "array", + "description": "A list of key words of the patent", + "items": { + "type": "string" + } + }, + "creation_date": { + "type": "string", + "description": "The date the patent was created", + "format": "date-time" + }, + "grant_date": { + "type": "string", + "description": "The date the patent was granted", + "format": "date-time" + }, + "publication_date": { + "type": "string", + "description": "The date the patent was published", + "format": "date-time" + }, + "link": { + "type": "string", + "description": "The link of the patent in Google patents", + "format": "date-time" + }, + "legal_status": { + "type": "string", + "description": "The legal status of the patent" + } + } + } diff --git a/iroko/patents/loaders/__init__.py b/iroko/patents/loaders/__init__.py new file mode 100644 index 00000000..4611d81e --- /dev/null +++ b/iroko/patents/loaders/__init__.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Loaders. + +This file contains sample loaders that can be used to deserialize input data in +an application level data structure. The marshmallow_loader() method can be +parameterized with different schemas for the record metadata. In the provided +json_v1 instance, it uses the MetadataSchemaV1, defining the +PersistentIdentifier field. +""" + +from __future__ import absolute_import, print_function + +from invenio_records_rest.loaders.marshmallow import marshmallow_loader + +from iroko.patents.marshmallow import PatentRecordSchemaV1 + +#: JSON loader using Marshmallow for data validation. +json_v1 = marshmallow_loader(PatentRecordSchemaV1) + +__all__ = ( + 'json_v1', +) diff --git a/iroko/patents/mappings/__init__.py b/iroko/patents/mappings/__init__.py new file mode 100644 index 00000000..b94e34a2 --- /dev/null +++ b/iroko/patents/mappings/__init__.py @@ -0,0 +1,21 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Mappings. + +Mappings define how organizations and their fields will be indexed in Elasticsearch. +The provided record-v1.0.0.json file is an example of how to index patents +in Elasticsearch. You need to provide one mapping per major version of +Elasticsearch you want to support. +""" + +from __future__ import absolute_import, print_function diff --git a/iroko/patents/mappings/v6/__init__.py b/iroko/patents/mappings/v6/__init__.py new file mode 100644 index 00000000..c4d43bd7 --- /dev/null +++ b/iroko/patents/mappings/v6/__init__.py @@ -0,0 +1,15 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Mappings for Elasticsearch 6.x.""" + +from __future__ import absolute_import, print_function diff --git a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json new file mode 100644 index 00000000..6a3ac3d0 --- /dev/null +++ b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json @@ -0,0 +1,125 @@ +{ + "mappings":{ + "patent-v1.0.0": { + "date_detection": false, + "numeric_detection": false, + "properties": { + "$schema": { + "type": "text", + "index": false + }, + "id": { + "type": "keyword" + }, + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "title": { + "type": "text", + "analyzer": "spanish" + }, + "authors": { + "type": "object", + "properties":{ + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "name": { + "type": "keyword" + } + } + }, + "affiliations": { + "type": "object", + "properties":{ + "identifiers": { + "type": "object", + "properties": { + "idtype": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "name": { + "type": "keyword" + } + } + }, + "summary": { + "type": "text", + "analyzer": "spanish" + }, + "classification": { + "type": "text", + "analyzer": "spanish" + }, + "claims": { + "type": "keyword" + }, + "prior_art": { + "type": "keyword" + }, + "drawing": { + "type": "keyword" + }, + "countries": { + "type": "keyword" + }, + "country": { + "type": "object", + "properties": { + "code": { + "type": "keyword" + }, + "name": { + "type": "keyword" + } + } + }, + "language": { + "type": "keyword" + }, + "key_words": { + "type": "keyword" + }, + "creation_date": { + "type": "date", + "format": "date" + }, + "grant_date": { + "type": "date", + "format": "date" + }, + "publication_date": { + "type": "date", + "format": "date" + }, + "link": { + "type": "keyword" + }, + "legal_status": { + "type": "keyword" + } + } + } + } +} diff --git a/iroko/patents/marshmallow/__init__.py b/iroko/patents/marshmallow/__init__.py new file mode 100644 index 00000000..72fa19d5 --- /dev/null +++ b/iroko/patents/marshmallow/__init__.py @@ -0,0 +1,20 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Schemas for marshmallow.""" + +from __future__ import absolute_import, print_function + +from .json import PatentRecordSchemaV1 + +__all__ = ( +'PatentRecordSchemaV1') diff --git a/iroko/patents/marshmallow/json.py b/iroko/patents/marshmallow/json.py new file mode 100644 index 00000000..c49f0c81 --- /dev/null +++ b/iroko/patents/marshmallow/json.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""JSON Schemas.""" + +from __future__ import absolute_import, print_function + +from invenio_jsonschemas import current_jsonschemas +from invenio_records_rest.schemas import Nested, StrictKeysMixin +from invenio_records_rest.schemas.fields import ( + DateString, GenFunction, + PersistentIdentifier, SanitizedUnicode, + ) +from marshmallow import INCLUDE, fields, missing, validate + +allow_empty = validate.Length(min=0) + + +def bucket_from_context(_, context): + """Get the record's bucket from context.""" + record = (context or {}).get('record', {}) + return record.get('_bucket', missing) + + +def files_from_context(_, context): + """Get the record's files from context.""" + record = (context or {}).get('record', {}) + return record.get('_files', missing) + + +def schema_from_context(_, context): + """Get the record's schema from context.""" + record = (context or {}).get('record', {}) + return record.get( + "_schema", + current_jsonschemas.path_to_url(PatentRecord._schema) + ) + + +class IdentifierSchemaV1(StrictKeysMixin): + """Ids schema.""" + + idtype = SanitizedUnicode() + value = SanitizedUnicode() + + +class CountrySchemaV1(StrictKeysMixin): + name = SanitizedUnicode() + code = SanitizedUnicode() + + +class AffiliationsSchemaV1(StrictKeysMixin): + id = SanitizedUnicode() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + name = SanitizedUnicode() + +class PersonSchemaV1(StrictKeysMixin): + id = SanitizedUnicode() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + name = SanitizedUnicode() + + +class PatentMetadataSchemaV1(StrictKeysMixin): + """Schema for the record metadata.""" + + id = PersistentIdentifier() + identifiers = Nested(IdentifierSchemaV1, many=True, required=True) + title = SanitizedUnicode(required=True, validate=validate.Length(min=3)) + authors = Nested(PersonSchemaV1, many=True) + affiliations = Nested(AffiliationsSchemaV1, many=True) + summary = SanitizedUnicode() + classification = SanitizedUnicode() + claims = SanitizedUnicode() + prior_art = SanitizedUnicode() + drawing = SanitizedUnicode() + countries = fields.List(SanitizedUnicode(), many=True) + country = Nested(CountrySchemaV1, many=False) + language = SanitizedUnicode() + key_words = fields.List(SanitizedUnicode(), many=True) + creation_date = DateString() + grant_date = DateString() + publication_date = DateString() + link = SanitizedUnicode() + legal_status = SanitizedUnicode() + _schema = GenFunction( + attribute="$schema", + data_key="$schema", + deserialize=schema_from_context, # to be added only when loading + ) + + +class PatentRecordSchemaV1(StrictKeysMixin): + """Record schema.""" + + metadata = fields.Nested(PatentMetadataSchemaV1) + created = fields.Str(dump_only=True) + revision = fields.Integer(dump_only=True) + updated = fields.Str(dump_only=True) + links = fields.Dict(dump_only=True) + id = PersistentIdentifier() + files = GenFunction( + serialize=files_from_context, deserialize=files_from_context) + +patentMetadataSchema = PatentMetadataSchemaV1(many=False, unknown=INCLUDE) diff --git a/iroko/patents/permissions.py b/iroko/patents/permissions.py new file mode 100644 index 00000000..112e6442 --- /dev/null +++ b/iroko/patents/permissions.py @@ -0,0 +1,28 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Permissions for Iroko.""" +from flask_login import current_user +from flask_principal import RoleNeed +from invenio_access import Permission + +curator_permission = Permission(RoleNeed('curator')) +admin_permission = Permission(RoleNeed('admin')) + +def can_edit_patent_factory(record, *args, **kwargs): + """Checks if logged user can update or delete patent items. + """ + def can(self): + if current_user.is_authenticated and curator_permission.can(): + return True + return False + return type('Check', (), {'can': can})() diff --git a/iroko/patents/register/marshmallow.py b/iroko/patents/register/marshmallow.py new file mode 100644 index 00000000..19db3149 --- /dev/null +++ b/iroko/patents/register/marshmallow.py @@ -0,0 +1,25 @@ +from marshmallow import Schema, fields, post_load + +from iroko.evaluations.models import EvaluationState + + +class RegisterSchema(Schema): + + id = fields.Int() + userEmail = fields.Str(required=False, allow_none=True) + date = fields.DateTime() + patents = fields.Int() + + @post_load + def register_load(self, item, **kwargs): + item['userEmail'] = item['userEmail'] if 'userEmail' in item else '' + item['date'] = item['date'] if 'date' in item else '' + item['patents'] = item['patents'] if 'patents' in item else '' + return item + + +register_schema_many = RegisterSchema( + many=True, only=( + 'id', 'userEmail', 'date', 'patents') + ) +register_schema = RegisterSchema(many=False) diff --git a/iroko/patents/register/model.py b/iroko/patents/register/model.py new file mode 100644 index 00000000..bcb24f71 --- /dev/null +++ b/iroko/patents/register/model.py @@ -0,0 +1,15 @@ +from invenio_accounts.models import User +from invenio_db import db +from sqlalchemy_utils.types import JSONType, UUIDType + +class Register(db.Model): + + __tablename__ = 'iroko_register' + + id = db.Column(db.Integer, primary_key=True) + userEmail = db.Column(db.String) + date = db.Column(db.DateTime, nullable=False) + patents = db.Column(db.Integer) + + #instancia del registro + data = db.Column(JSONType) diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py new file mode 100644 index 00000000..76d45e4b --- /dev/null +++ b/iroko/patents/rest.py @@ -0,0 +1,232 @@ + + + +from __future__ import absolute_import, print_function + +from datetime import datetime, date +import json +import os + +from flask import Blueprint, flash, jsonify, make_response, request +from elasticsearch.exceptions import NotFoundError +from invenio_pidstore.resolver import Resolver +from invenio_pidstore.models import PersistentIdentifier +from invenio_indexer.api import RecordIndexer +from flask_login import current_user +from invenio_oauth2server import require_api_auth +from invenio_db import db +from iroko.utils import remove_nulls +from flask_principal import RoleNeed +from invenio_access import Permission + +from iroko.api import IrokoBaseRecord +from iroko.patents.register.model import Register +from iroko.patents.register.marshmallow import register_schema, register_schema_many +from iroko.patents.api import PatentRecord +from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext +from iroko.patents.serializers import json_v1_response +from iroko.pidstore import pids +from iroko.utils import IrokoResponseStatus, iroko_json_response +from iroko.pidstore.pids import ( + IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas, + ) + +api_blueprint = Blueprint( + 'iroko_api_patents', + __name__, + url_prefix='/patents' + ) + + +@api_blueprint.route('/pid', methods=['GET']) +def get_patent_by_pid_canonical(): + """ + Get a source by any PID received as an argument, including UUID + this method gives the directed organization with that pid, even if is obsolete or redirected status + """ + try: + _id = request.args.get('value') + print("**********************", _id) + pid, patent = PatentRecord.get_pat_by_pid(pids.PATENT_PID_TYPE, _id) + if not pid or not patent: + raise Exception('') + + return json_v1_response(pid, patent) + + except Exception as e: + return jsonify({ + 'ERROR': 'no pid found'.format(_id) + }) + + +@api_blueprint.route('/import', methods=['POST']) +def upload_file(): + try: + if not request.is_json: + raise Exception("No JSON data provided") + input_data = request.json + print('=======================', input_data) + for data in input_data: + if 'assignee' in data: + patent = PatentRecord.fix_gp_imported(data) + else: + patent = PatentRecord.fix_patents_imported(data) + patentRecord, msg = PatentRecord.resolve_and_update(data = patent) + print('aaaaaaaaaaa',patentRecord) + if not patentRecord: + print("no pids found, creating patent") + patentRecord = PatentRecord.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'created' + + except Exception as e: + return jsonify({ + 'ERROR HOLA': str(e), + }) + + return jsonify({ + 'SUCCES':"Patentes creadas", + 'message':msg, + }) + + +@api_blueprint.route('//edit', methods=['POST']) +def edit_patent(uuid): + """ + Dado un uuid modificar los datos de una patente + """ + try: + if not request.is_json: + raise Exception("No se especifican datos en formato json para la curacion") + input_data = request.json + print(input_data) + # org = org_json_v1.transform_record(input_data["id"], input_data) + + pat, msg = PatentRecord.resolve_and_update(uuid, input_data) + + if not pat: + raise Exception("No se encontro record de patente") + + print("entra a la api de editar patentes...........................................") + return jsonify({ + 'SUCCES':"Patente modificada", + 'message':msg, + 'org':pat + }) + except Exception as e: + print(e) + return jsonify({ + 'ERROR': str(e), + }) + +@api_blueprint.route('/new', methods=['POST']) +def create_patent(): + try: + if not request.is_json: + raise Exception("No JSON data provided") + + input_data = request.json + id = input_data['identifiers'][0]['value'] + pid, patent = PatentRecord.get_pat_by_pid(id) + print('PID',pid) + + + if pid: + raise Exception("Patente existente") + + pat= PatentRecord.create(input_data, iroko_pid_type=pids.PATENT_PID_TYPE) + msg = 'ok' + + print('PAT',pat) + + return jsonify({ + 'SUCCES':"Patente creada", + 'message':msg, + 'pat':pat + }) + + except Exception as e: + return jsonify({ + 'ERROR': str(e), + }) + +@api_blueprint.route('/delete/', methods=['DELETE']) +def delete_patent(uuid): + + record = IrokoBaseRecord.get_record_by_pid_value(uuid) + + if not record: + raise Exception("No se encontro record de patente") + + result = super(IrokoBaseRecord, record).delete(force=False) + db.session.commit() + # if delindex: + try: + RecordIndexer().delete(record) + db.session.commit() + except NotFoundError: + pass + + + return result + +@api_blueprint.route('/register', methods=['GET']) +def get_register(): + try: + count = int(request.args.get('size')) if request.args.get('size') else 10 + page = int(request.args.get('page')) if request.args.get('page') else 1 + + if page < 1: + page = 1 + offset = count * (page - 1) + limit = offset + count + + result = Register.query.all() + total = len(result) + + return iroko_json_response( + IrokoResponseStatus.SUCCESS, \ + 'ok', 'register', \ + { + 'data': register_schema_many.dump(result[offset:limit]), + 'total': total + } + ) + + except Exception as e: + msg = str(e) + return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None) + +@api_blueprint.route('/register/new', methods=['POST']) +def create_register(): + try: + input_data = request.json + register = Register() + register.data = input_data + register.userEmail = input_data.get("userEmail") + register.date = input_data.get("date") + register.patents = input_data.get("patents") + + db.session.add(register) + db.session.commit() + + msg = "New Register Created" + + except Exception as e: + msg = str(e) + return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None) + + return iroko_json_response( + IrokoResponseStatus.SUCCESS, \ + msg, 'register', \ + register_schema.dump(register), + ) + +@api_blueprint.route('/register/delete/', methods=['DELETE']) +def delete_register(id): + register = Register.query.filter_by(id = id).delete() + db.session.commit() + + return make_response("Eliminado", 204) + + + diff --git a/iroko/patents/search.py b/iroko/patents/search.py new file mode 100644 index 00000000..d81931f1 --- /dev/null +++ b/iroko/patents/search.py @@ -0,0 +1,19 @@ + +"""Source search APIs.""" + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +from invenio_search import RecordsSearch + + +class PatentsSearch(RecordsSearch): + """RecordsSearch for sources.""" + + class Meta: + """Search only on patents index.""" + + index = "patents" + doc_types = None diff --git a/iroko/patents/serializers/__init__.py b/iroko/patents/serializers/__init__.py new file mode 100644 index 00000000..a664f724 --- /dev/null +++ b/iroko/patents/serializers/__init__.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +# Copyright (c) 2022. Universidad de Pinar del Rio +# This file is part of SCEIBA (sceiba.cu). +# SCEIBA is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +# +# +# Iroko is free software; you can redistribute it and/or modify it under the +# terms of the MIT License; see LICENSE file for more details. + +"""Record serializers.""" + +from __future__ import absolute_import, print_function + +from invenio_records_rest.serializers.json import JSONSerializer +from invenio_records_rest.serializers.response import ( + record_responsify, + search_responsify, + ) + +# Serializers +# =========== +#: JSON serializer definition. +from iroko.patents.marshmallow.json import PatentRecordSchemaV1 + +json_v1 = JSONSerializer(PatentRecordSchemaV1, replace_refs=True) + +# Records-REST serializers +# ======================== +#: JSON record serializer for individual organizations. +json_v1_response = record_responsify(json_v1, 'application/json') +#: JSON record serializer for search results. +json_v1_search = search_responsify(json_v1, 'application/json') + +__all__ = ( + 'json_v1', + 'json_v1_response', + 'json_v1_search', +) diff --git a/iroko/patents/utils.py b/iroko/patents/utils.py new file mode 100644 index 00000000..61efa26e --- /dev/null +++ b/iroko/patents/utils.py @@ -0,0 +1,65 @@ + + +from lxml import etree + +from iroko.records import ContributorRole + + +def get_people_from_nlm(metadata: etree._Element): + """get a PersonRecord from {http://dtd.nlm.nih.gov/publishing/2.3}contrib + etree._Element + return creators, contribs dics, """ + + xmlns = '{http://dtd.nlm.nih.gov/publishing/2.3}' + contribs_xml = metadata.findall('.//' + xmlns + 'contrib') + + contributors = {} + + for contrib in contribs_xml: + person = dict() + + surname = contrib.find(xmlns + 'name/' + xmlns + 'surname') + given_names = contrib.find(xmlns + 'name/' + xmlns + 'given-names') + aff = contrib.find(xmlns + 'aff') + email = contrib.find(xmlns + 'email') + if given_names is None and surname is None: + # FIXME if a person dont have surname or given name, then is not a person.... + # even if there is an email? + continue + else: + name = "" + if given_names is not None and given_names.text is not None: + name += given_names.text + if surname is not None and surname.text is not None: + name += ' ' + surname.text + person['name'] = name + if aff is not None: + person['affiliations'] = [] + person['affiliations'].append(aff.text) + if email is not None: + person['email'] = email.text + person['roles'] = [] + if 'corresp' in contrib.attrib: + if contrib.attrib['corresp'] == "yes": + person['roles'].append(ContributorRole.ContactPerson.value) + if 'contrib-type' in contrib.attrib: + ctype = contrib.attrib['contrib-type'] + if ctype == "author": + person['roles'].append(ContributorRole.Author.value) + if ctype == "editor": + person['roles'].append(ContributorRole.Editor.value) + if ctype == "jmanager": + person['roles'].append(ContributorRole.JournalManager.value) + if person['name'] in contributors.keys(): + contributors[person['name']]['roles'].extend(person['roles']) + else: + contributors[person['name']] = person + creators = [] + contribs = [] + for name in contributors: + person = contributors[name] + if ContributorRole.Author.value in person['roles']: + creators.append(person) + else: + contribs.append(person) + return creators, contribs diff --git a/iroko/pidstore/fetchers.py b/iroko/pidstore/fetchers.py index 57dfdcc5..043b76e8 100755 --- a/iroko/pidstore/fetchers.py +++ b/iroko/pidstore/fetchers.py @@ -73,6 +73,13 @@ def person_uuid_fetcher(per_uuid, data): pid_value=str(data[pids.IROKO_UUID_FIELD]), ) +def patent_uuid_fetcher(per_uuid, data): + return FetchedPID( + provider=providers.PatentUUIDProvider, + pid_type=providers.PatentUUIDProvider.pid_type, + pid_value=str(data[pids.IROKO_UUID_FIELD]), + ) + def identifiers_fetcher(record_uuid, data, pid_type): assert data, "no data" diff --git a/iroko/pidstore/minters.py b/iroko/pidstore/minters.py index 0c3ebbd8..822acd95 100755 --- a/iroko/pidstore/minters.py +++ b/iroko/pidstore/minters.py @@ -74,14 +74,21 @@ def organization_uuid_minter(org_uuid, data): return provider.pid -def person_uuid_minter(org_uuid, data): +def person_uuid_minter(person_uuid, data): provider = providers.PersonUUIDProvider.create( object_type=pids.IROKO_OBJECT_TYPE, - object_uuid=org_uuid, + object_uuid=person_uuid, data=data ) return provider.pid +def patent_uuid_minter(patent_uuid, data): + provider = providers.PatentUUIDProvider.create( + object_type=pids.IROKO_OBJECT_TYPE, + object_uuid=patent_uuid, + data=data + ) + return provider.pid def identifiers_minter(uuid, data, object_type): prsIDs = providers.IdentifiersProvider.create_identifiers( diff --git a/iroko/pidstore/pids.py b/iroko/pidstore/pids.py index 6652991a..fde5eee1 100644 --- a/iroko/pidstore/pids.py +++ b/iroko/pidstore/pids.py @@ -35,6 +35,10 @@ PERSON_PID_MINTER = "perid" PERSON_PID_FETCHER = "perid" +PATENT_PID_TYPE = "patid" +PATENT_PID_MINTER = "patid" +PATENT_PID_FETCHER = "patid" + IROKO_OBJECT_TYPE = "rec" IROKO_UUID_FIELD = "id" @@ -42,7 +46,8 @@ 'irouid', 'srcid', 'orgid', - 'perid' + 'perid', + 'patid' ] def get_pid_by_data(data): diff --git a/iroko/pidstore/providers.py b/iroko/pidstore/providers.py index 7fd1fecf..e6ccadad 100755 --- a/iroko/pidstore/providers.py +++ b/iroko/pidstore/providers.py @@ -265,6 +265,43 @@ def create(cls, pid_type=None, pid_value=None, object_type=None, ) +class PatentUUIDProvider(BaseProvider): + """Document identifier provider.""" + + pid_type = pids.PATENT_PID_TYPE + """Type of persistent identifier.""" + + pid_provider = None + """Provider name. + The provider name is not recorded in the PID since the provider does not + provide any additional features besides creation of record ids. + """ + + default_status = PIDStatus.REGISTERED + """Record IDs are by default registered immediately. + Default: :attr:`invenio_pidstore.models.PIDStatus.REGISTERED` + """ + + object_type = pids.IROKO_OBJECT_TYPE, + + @classmethod + def create(cls, pid_type=None, pid_value=None, object_type=None, + object_uuid=None, **kwargs): + """Create a new record identifier from the depoist PID value.""" + pid_type = pid_type or cls.pid_type + pid_value = pid_value or uuid.uuid4() + object_type = object_type or cls.object_type + object_uuid = object_uuid or uuid.uuid4() + kwargs.setdefault('status', cls.default_status) + return super(PatentUUIDProvider, cls).create( + pid_type=pid_type, + pid_value=pid_value, + object_type=object_type, + object_uuid=object_uuid, + **kwargs + ) + + class IdentifiersProvider(BaseProvider): default_status = PIDStatus.REGISTERED diff --git a/pyproject.toml b/pyproject.toml index 0c35b9c4..3f18ad3d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,6 +105,7 @@ iroko_harvester = "iroko.harvester.ext:IrokoHarvester" invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles" iroko_organizations = "iroko.organizations.ext:IrokoOrganizations" iroko_persons = "iroko.persons.ext:IrokoPersons" +iroko_patents = "iroko.patents.ext:IrokoPatents" [tool.poetry.plugins."invenio_base.blueprints"] iroko = "iroko.theme.views:blueprint" @@ -130,18 +131,23 @@ iroko = "iroko.records:iroko" invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles" iroko_organizations = "iroko.organizations:IrokoOrganizations" iroko_persons = "iroko.persons:IrokoPersons" +iroko_patents = "iroko.patents:IrokoPatents" [tool.poetry.plugins."invenio_jsonschemas.schemas"] iroko = "iroko.records.jsonschemas" sources = "iroko.sources.schemas" organizations = "iroko.organizations.jsonschemas" persons = "iroko.persons.jsonschemas" +patents = "iroko.patents.jsonschemas" + [tool.poetry.plugins."invenio_search.mappings"] records = "iroko.records.mappings" sources = "iroko.sources.mappings" organizations = "iroko.organizations.mappings" persons = "iroko.persons.mappings" +patents = "iroko.patents.mappings" + [tool.poetry.plugins."invenio_admin.views"] vocabulary_admin = "iroko.vocabularies.admin:vocabularies_adminview" @@ -162,6 +168,7 @@ iroko_sources = "iroko.sources.models" iroko_harvester = "iroko.harvester.models" invenio_userprofiles = "iroko.userprofiles.models" iroko_evaluations = "iroko.evaluations.models" +iroko_register = "iroko.patents.register.model" [tool.poetry.plugins."invenio_base.api_blueprints"] iroko_taxonomy = "iroko.vocabularies.rest:api_blueprint" @@ -173,6 +180,9 @@ invenio_userprofiles = "iroko.userprofiles.rest:api_blueprint" iroko_records = "iroko.records.rest:api_blueprint" iroko_organizations = "iroko.organizations.rest:api_blueprint" iroko_persons = "iroko.persons.rest:api_blueprint" +iroko_patents = "iroko.patents.rest:api_blueprint" +iroko_register = "iroko.patents.rest:api_blueprint" + iroko_evaluations = "iroko.evaluations.rest:api_blueprint" [tool.poetry.plugins."invenio_celery.tasks"] @@ -185,6 +195,7 @@ recoai = "iroko.pidstore.fetchers:iroko_source_oai_fetcher" srcid = "iroko.pidstore.fetchers:iroko_source_uuid_fetcher" orgid = "iroko.pidstore.fetchers:organization_uuid_fetcher" perid = "iroko.pidstore.fetchers:person_uuid_fetcher" +patid = "iroko.pidstore.fetchers:patent_uuid_fetcher" [tool.poetry.plugins."invenio_pidstore.minters"] irouid = "iroko.pidstore.minters:iroko_uuid_minter" @@ -193,6 +204,7 @@ recoai = "iroko.pidstore.minters:iroko_source_oai_minter" srcid = "iroko.pidstore.minters:iroko_source_uuid_minter" orgid = "iroko.pidstore.minters:organization_uuid_minter" perid = "iroko.pidstore.minters:person_uuid_minter" +patid = "iroko.pidstore.minters:patent_uuid_minter" [tool.poetry.plugins."invenio_db.alembic"] invenio_userprofiles = "iroko.userprofiles:alembic" diff --git a/run b/run index 25506955..9415b3d4 100755 --- a/run +++ b/run @@ -18,7 +18,7 @@ export FLASK_ENV=development invenio run \ --cert "$script_path"/docker/nginx/test.crt \ --key "$script_path"/docker/nginx/test.key\ - --host "10.16.64.222" \ + # --host "10.16.64.222" \ & pid_server=$! #trap 'kill $pid_celery $pid_server &>/dev/null' EXIT diff --git a/setup.py b/setup.py index 9ebb84eb..6287ea09 100644 --- a/setup.py +++ b/setup.py @@ -142,6 +142,8 @@ 'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles', 'iroko_organizations = iroko.organizations.ext:IrokoOrganizations', 'iroko_persons = iroko.persons.ext:IrokoPersons', + 'iroko_patents = iroko.patents.ext:IrokoPatents', + ], 'invenio_base.blueprints': [ 'iroko = iroko.theme.views:blueprint', @@ -168,18 +170,21 @@ 'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles', 'iroko_organizations = iroko.organizations:IrokoOrganizations', 'iroko_persons = iroko.persons:IrokoPersons', + 'iroko_patents = iroko.patents:IrokoPatents', ], 'invenio_jsonschemas.schemas': [ 'iroko = iroko.records.jsonschemas', 'sources = iroko.sources.schemas', 'organizations = iroko.organizations.jsonschemas', 'persons = iroko.persons.jsonschemas', + 'patents = iroko.patents.jsonschemas', ], 'invenio_search.mappings': [ 'records = iroko.records.mappings', 'sources = iroko.sources.mappings', 'organizations = iroko.organizations.mappings', 'persons = iroko.persons.mappings', + 'patents = iroko.patents.mappings', ], 'invenio_admin.views': [ 'vocabulary_admin = iroko.vocabularies.admin:vocabularies_adminview', @@ -203,6 +208,7 @@ 'iroko_harvester = iroko.harvester.models', 'invenio_userprofiles = iroko.userprofiles.models', 'iroko_evaluations = iroko.evaluations.models', + 'iroko_register = iroko.patents.register.model', ], 'invenio_base.api_blueprints': [ 'iroko_taxonomy = iroko.vocabularies.rest:api_blueprint', @@ -214,7 +220,9 @@ 'iroko_records = iroko.records.rest:api_blueprint', 'iroko_organizations = iroko.organizations.rest:api_blueprint', 'iroko_persons = iroko.persons.rest:api_blueprint', + 'iroko_patents = iroko.patents.rest:api_blueprint', 'iroko_evaluations = iroko.evaluations.rest:api_blueprint', + 'iroko_register = iroko.patents.rest:api_blueprint', ], 'invenio_celery.tasks': [ 'iroko_harvester = iroko.harvester.tasks' @@ -226,6 +234,7 @@ 'srcid = iroko.pidstore.fetchers:iroko_source_uuid_fetcher', 'orgid = iroko.pidstore.fetchers:organization_uuid_fetcher', 'perid = iroko.pidstore.fetchers:person_uuid_fetcher', + 'patid = iroko.pidstore.fetchers:patents_uuid_fetcher', ], 'invenio_pidstore.minters': [ 'irouid = iroko.pidstore.minters:iroko_uuid_minter', @@ -234,6 +243,7 @@ 'srcid = iroko.pidstore.minters:iroko_source_uuid_minter', 'orgid = iroko.pidstore.minters:organization_uuid_minter', 'perid = iroko.pidstore.minters:person_uuid_minter', + 'patid = iroko.pidstore.minters:patents_uuid_minter', ], 'invenio_db.alembic': [ 'invenio_userprofiles = iroko.userprofiles:alembic',