diff --git a/iroko/api.py b/iroko/api.py
index 9faa76b7..28a415d8 100644
--- a/iroko/api.py
+++ b/iroko/api.py
@@ -11,6 +11,7 @@
from invenio_records.api import Record
from invenio_records_files.api import Record
from sqlalchemy.exc import NoResultFound
+import json
from iroko.pidstore import pids
from iroko.pidstore.minters import identifiers_minter, iroko_uuid_minter
@@ -68,6 +69,7 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs):
persistent_identifier, rec = resolver.resolve(str(iroko_uuid))
if rec:
print("{0}={1} found".format(pid_type, iroko_uuid))
+ print('REC',rec)
rec.update(data)
# .update(data, dbcommit=dbcommit, reindex=reindex)
return rec, 'updated'
@@ -85,16 +87,20 @@ def resolve_and_update(cls, iroko_uuid=None, data={}, **kwargs):
str(identifier[IDENTIFIERS_FIELD_VALUE])
)
print('<<<<<<<<<<<<<<<<<<')
- print('rec= ', rec)
+ print('rec= ', json.dumps(rec, indent=3))
if rec:
- print(
- "{0}={1} found".format(
- schema, str(
- identifier[IDENTIFIERS_FIELD_VALUE]
- )
- )
- )
+ # print(
+ # "{0}={1} found".format(
+ # schema, str(
+ # identifier[IDENTIFIERS_FIELD_VALUE]
+ # )
+ # )
+ # )
+ # print("===================", data)
+ print(json.dumps(data, indent=3))
rec.update(data)
+ print('========================================')
+
print('>>>>>>>>>>>>>>>>>>>>')
print('rec updated: ', rec)
return rec, 'updated'
@@ -182,6 +188,8 @@ def update(self, data=None, dbcommit=True, reindex=True, override_pids=True):
""" Update data for record.
override_pids, if True
"""
+ print(json.dumps(data, indent=3), type(data))
+
print('begin update')
self['_save_info_updated'] = str(date.today())
diff --git a/iroko/config.py b/iroko/config.py
index a7373192..a9a2a6a3 100755
--- a/iroko/config.py
+++ b/iroko/config.py
@@ -28,13 +28,17 @@
from iroko.organizations.api import OrganizationRecord
from iroko.organizations.permissions import can_edit_organization_factory
from iroko.organizations.search import OrganizationSearch
+from iroko.patents.api import PatentRecord
+from iroko.patents.search import PatentsSearch
+from iroko.patents.permissions import can_edit_patent_factory
from iroko.persons.api import PersonRecord
from iroko.persons.permissions import can_edit_person_factory
from iroko.persons.search import PersonsSearch
from iroko.pidstore import pids as pids
from iroko.pidstore.pids import (
ORGANIZATION_PID_FETCHER, ORGANIZATION_PID_MINTER,
- ORGANIZATION_PID_TYPE, PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE,
+ ORGANIZATION_PID_TYPE, PATENT_PID_FETCHER, PATENT_PID_MINTER, PATENT_PID_TYPE,
+ PERSON_PID_FETCHER, PERSON_PID_MINTER, PERSON_PID_TYPE,
)
from iroko.records.api import IrokoRecord
from iroko.records.search import IrokoRecordSearch
@@ -122,6 +126,10 @@ def _(x):
_ORG_CONVERTER = (
'pid(orgid, record_class="iroko.organizations.api.OrganizationRecord")'
)
+_PATENT_CONVERTER = (
+ 'pid(patid, record_class="iroko.patents.api.PatentRecord")'
+)
+
_PERSON_CONVERTER = (
'pid(perid, record_class="iroko.persons.api.PersonRecord")'
)
@@ -224,6 +232,37 @@ def _(x):
'delete_permission_factory_imp': can_edit_organization_factory,
'list_permission_factory_imp': allow_all
},
+ 'patid': {
+ 'pid_type': PATENT_PID_TYPE,
+ 'pid_minter': PATENT_PID_MINTER,
+ 'pid_fetcher': PATENT_PID_FETCHER,
+ 'default_endpoint_prefix': True,
+ 'record_class': PatentRecord,
+ 'search_class': PatentsSearch,
+ 'indexer_class': RecordIndexer,
+ 'record_serializers': {
+ 'application/json': ('iroko.patents.serializers'
+ ':json_v1_response'),
+ },
+ 'search_serializers': {
+ 'application/json': ('iroko.patents.serializers'
+ ':json_v1_search'),
+ },
+ 'record_loaders': {
+ 'application/json': ('iroko.patents.loaders'
+ ':json_v1'),
+ },
+ 'list_route': '/search/patents/',
+ 'item_route': '/pid/patent/<{0}:pid_value>'.format(_PATENT_CONVERTER),
+ 'default_media_type': 'application/json',
+ 'max_result_window': 10000,
+ 'error_handlers': {},
+ 'create_permission_factory_imp': can_edit_patent_factory,
+ 'read_permission_factory_imp': check_elasticsearch,
+ 'update_permission_factory_imp': can_edit_patent_factory,
+ 'delete_permission_factory_imp': can_edit_patent_factory,
+ 'list_permission_factory_imp': allow_all
+ },
'perid': {
'pid_type': PERSON_PID_TYPE,
'pid_minter': PERSON_PID_MINTER,
@@ -359,6 +398,26 @@ def _(x):
}
}
},
+ 'patents': {
+ 'filters': {
+ 'country': terms_filter('country.name'),
+ 'language': terms_filter('language')
+ },
+ 'aggs': {
+ 'country': {
+ 'terms': {
+ 'field': 'country.name',
+ 'size': 5
+ }
+ },
+ 'language': {
+ 'terms': {
+ 'field': 'language',
+ 'size': 5
+ }
+ }
+ }
+ },
'persons': {
'filters': {
'gender': terms_filter('gender'),
@@ -432,6 +491,20 @@ def _(x):
'order': 2
}
},
+ 'patents': {
+ 'bestmatch': {
+ 'title': _('Best match'),
+ 'fields': ['_score'],
+ 'default_order': 'desc',
+ 'order': 1
+ },
+ 'mostrecent': {
+ 'title': _('Most recent'),
+ 'fields': ['-_created'],
+ 'default_order': 'asc',
+ 'order': 2
+ }
+ },
'persons': {
'bestmatch': {
'title': _('Best match'),
@@ -447,6 +520,7 @@ def _(x):
}
}
}
+
"""Setup sorting options."""
RECORDS_REST_DEFAULT_SORT: {
@@ -462,6 +536,10 @@ def _(x):
'query': 'bestmatch',
'noquery': 'bestmatch',
},
+ 'patents': {
+ 'query': 'bestmatch',
+ 'noquery': 'bestmatch',
+ },
'persons': {
'query': 'bestmatch',
'noquery': 'bestmatch',
diff --git a/iroko/patents/__init__.py b/iroko/patents/__init__.py
new file mode 100755
index 00000000..36c59dd1
--- /dev/null
+++ b/iroko/patents/__init__.py
@@ -0,0 +1,16 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+
+"""iroko."""
+
+from __future__ import absolute_import, print_function
+
+from .ext import IrokoPatents
+
+__all__ = ('IrokoPatents', )
diff --git a/iroko/patents/api.py b/iroko/patents/api.py
new file mode 100755
index 00000000..fd7af644
--- /dev/null
+++ b/iroko/patents/api.py
@@ -0,0 +1,313 @@
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+import json
+
+from elasticsearch.exceptions import NotFoundError
+from invenio_pidstore.resolver import Resolver
+from invenio_pidstore.models import PersistentIdentifier
+from invenio_indexer.api import RecordIndexer
+from invenio_pidstore.errors import PIDDeletedError, PIDDoesNotExistError
+from sqlalchemy.exc import NoResultFound
+
+
+
+from iroko.api import IrokoBaseRecord
+from iroko.organizations.api import OrganizationRecord
+from iroko.persons.api import PersonRecord
+from iroko.pidstore import pids
+from iroko.utils import remove_nulls
+from iroko.pidstore.pids import (
+ IDENTIFIERS_FIELD, IDENTIFIERS_FIELD_VALUE,
+ IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas,
+ )
+
+
+class PatentRecord (IrokoBaseRecord):
+ _schema = "patents/patent-v1.0.0.json"
+
+ @classmethod
+ def load_from_json_file(cls, file_path):
+ """bulk import of patent from a json file
+ expect spi format"""
+
+ resolver = Resolver(
+ pid_type=pids.PATENT_PID_TYPE,
+ object_type=pids.IROKO_OBJECT_TYPE,
+ getter=PatentRecord.get_record,
+ )
+ # per = PersonRecord.get_record_by_pid_value(per_pid)
+ with open(file_path) as _file:
+ patents = json.load(_file, object_hook=remove_nulls)
+ a = 0
+ for data in patents:
+ a = a + 1
+ patent = PatentRecord(data)
+ del patent['_id']
+ print(patent)
+ patentRecord = None
+ patentRecord, msg = cls.resolve_and_update(data=patent)
+ print(patentRecord)
+ if not patentRecord:
+ print("no pids found, creating patent")
+ patentRecord = cls.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE)
+ msg = 'created'
+ print('====================================', a)
+
+ @classmethod
+ def get_pat_by_pid(cls, pid_value, with_deleted=False):
+ resolver = Resolver(
+ pid_type='doi',
+ object_type=IROKO_OBJECT_TYPE,
+ getter=cls.get_record,
+ )
+ try:
+ return resolver.resolve(str(pid_value))
+ except Exception:
+ pass
+
+ # for pid_type in identifiers_schemas:
+ # try:
+ # resolver.pid_type = pid_type
+ # schemapid, pat = resolver.resolve(pid_value)
+ # pid = PersistentIdentifier.get(PATENT_PID_TYPE, pat['id'])
+ # return pid, pat
+ # except Exception as e:
+ # pass
+ return None, None
+
+ @classmethod
+ def create_or_update(cls, pat_uuid, data, **kwargs):
+ """Create or update PatentRecord."""
+
+ # assert pat_uuid
+ pat, msg = cls.resolve_and_update(pat_uuid, data)
+ # if resolve_and_update do no return, then is not existed pat, so trying to create one
+ if not pat:
+ print("no pids found, creating patent")
+ created_pat = cls.create(data, iroko_pid_type=pids.PATENT_PID_TYPE,
+ iroko_pid_value=pat_uuid)
+ pat = created_pat
+ msg = 'created'
+
+ return pat, msg
+
+
+ @classmethod
+ def update_imported(cls, pat_uuid=None, data={}):
+ resolver = Resolver(
+ pid_type=pids.RECORD_PID_TYPE,
+ object_type=IROKO_OBJECT_TYPE,
+ getter=cls.get_record,
+ )
+ if IDENTIFIERS_FIELD in data: # Si no lo encontro por el uuid, igual se intenta buscar
+ # desde cualquier otri pid
+ for schema in identifiers_schemas:
+ for identifier in data[IDENTIFIERS_FIELD]:
+ if schema == identifier[IDENTIFIERS_FIELD_TYPE]:
+ # print("identifier ------ ", identifier)
+ resolver.pid_type = schema
+ try:
+ persistent_identifier, rec = resolver.resolve(
+ str(identifier[IDENTIFIERS_FIELD_VALUE])
+ )
+ print('<<<<<<<<<<<<<<<<<<')
+ print('rec= ', json.dumps(rec, indent=3))
+ print('data= ', json.dumps(rec, indent=3))
+ if rec:
+ resolver.pid_type = pids.PATENT_PID_TYPE
+ uuid = rec["id"]
+ print(uuid)
+ try:
+ persistent_identifier, rec = resolver.resolve(str(uuid))
+ print('rec= ', json.dumps(rec, indent=3))
+ if rec:
+ print('REC',rec)
+ rec.update(data)
+ return rec, 'updated'
+ except Exception:
+ pass
+ print('========================================')
+
+ print('>>>>>>>>>>>>>>>>>>>>')
+ print('rec updated: ', rec)
+ return rec, 'updated'
+ except PIDDoesNotExistError as pidno:
+ print(
+ "PIDDoesNotExistError: {0} == {1}".format(
+ schema,
+ str(
+ identifier[
+ IDENTIFIERS_FIELD_VALUE]
+ )
+ )
+ )
+ except (PIDDeletedError, NoResultFound) as ex:
+ cls.__delete_pids_without_object(data[IDENTIFIERS_FIELD])
+ except Exception as e:
+ print('-------------------------------')
+ # print(str(e))
+ print(traceback.format_exc())
+ print('-------------------------------')
+ pass
+ return None, None
+
+ @classmethod
+ def delete(cls, pid, vendor=None, delindex=True, force=False):
+ """Delete an IrokoRecord record."""
+ record = cls.get_record_by_pid_value(pid)
+ pid.replace(pid, '')
+ result = record.delete(force=force)
+ if delindex:
+ try:
+ RecordIndexer().delete(record)
+ except NotFoundError:
+ pass
+ return result
+
+
+ def fix_patents_imported(patent):
+ if 'identifiers' in patent:
+ patent['identifiers'] = patent['identifiers']
+
+ if 'country' in patent:
+ patent['country'] = patent['country']
+ else:
+ patent['country'] = {'code': '', 'name': ''}
+
+ if 'affiliations' in patent:
+ patent['affiliations'] = patent['affiliations']
+ else:
+ patent['affiliations'] = []
+
+ if 'authors' in patent:
+ patent['authors'] = patent['authors']
+ else:
+ patent['authors'] = []
+
+ if 'language' in patent:
+ patent['language'] = patent['language']
+ else:
+ patent['language'] = ''
+
+ if 'classification' in patent:
+ patent['classification'] = patent['classification']
+ else:
+ patent['classification'] = ''
+
+ if 'link' in patent:
+ patent['link'] = patent['link']
+ else:
+ patent['link'] = ''
+
+ if 'summary' in patent:
+ patent['summary'] = patent['summary']
+ else:
+ patent['summary'] = ''
+
+ return patent
+
+ def fix_gp_imported(patent):
+ if 'id' in patent:
+ identifiers = []
+ identifiers.append({
+ 'idtype': 'doi',
+ 'value': patent['id']
+ })
+ patent['identifiers'] = identifiers
+ del patent['id']
+
+ if 'assignee' in patent:
+ affiliations = []
+ for affiliation in patent['assignee']:
+ affiliations.append({
+ 'identifiers': [],
+ 'name': affiliation
+ })
+ patent['affiliations'] = affiliations
+ del patent['assignee']
+
+ else :
+ patent['affiliations'] = []
+
+ if 'author' in patent and len(patent['author']) > 0:
+ authors = []
+ for author in patent['author']:
+ authors.append({
+ 'identifiers': [],
+ 'name': author
+ })
+ patent['authors'] = authors
+ del patent['author']
+
+ else :
+ patent['authors'] = []
+
+ patent['language'] = ''
+ patent['country'] = {'code': '', 'name': ''}
+ patent['classification'] = ''
+ del patent['']
+
+ if 'filing/creation date' in patent:
+ patent['creation_date'] = patent['filing/creation date']
+ del patent['filing/creation date']
+
+ if 'grant date' in patent:
+ patent['grant_date'] = patent['grant date']
+ del patent['grant date']
+
+ if 'priority date' in patent:
+ del patent['priority date']
+
+ if 'publication date' in patent:
+ patent['publication_date'] = patent['publication date']
+ del patent['publication date']
+
+ if 'result link' in patent:
+ patent['link'] = patent['result link']
+ del patent['result link']
+
+ return patent
+
+
+
+
+def fixture_spi_fields(person: PersonRecord, org: OrganizationRecord):
+ """hard code fixtures of spi data, coming from human resources of cuban institutions """
+ country_code = 'cu'
+ country = 'Cuba'
+ if 'addresses' in org and len(org['addresses']) > 0:
+ country_code = org['addresses'][0]['country_code']
+ country = org['addresses'][0]['country']
+ person['country'] = {'code': country_code, 'name': country}
+
+ if 'institutional_email' in person and len(person['institutional_email']) > 0:
+ person.add_email_address(person['institutional_email'])
+ if 'emails' in person:
+ for ma in person['emails']:
+ person.add_email_address(person['institutional_email'])
+ if 'lastName' in person:
+ person['last_name'] = person['lastName']
+
+ person.pop('lastName')
+ person.pop('institutional_email')
+ person.pop('emails')
+
+ new_identifiers = []
+ for identifier in person[pids.IDENTIFIERS_FIELD]:
+ if identifier['idtype'] == 'noCi':
+ new_identifiers.append({
+ 'idtype': 'dni',
+ 'value': 'dni:' + country_code + '.' + identifier['idvalue'],
+ })
+ elif identifier['idtype'] == 'idExpediente':
+ new_identifiers.append({
+ 'idtype': 'hrid',
+ 'value': 'hrid:' + str(org.id) + '.' + identifier['idvalue'],
+ })
+ else:
+ new_identifiers.append(identifier)
+ person[pids.IDENTIFIERS_FIELD] = new_identifiers
+ return person
diff --git a/iroko/patents/cli.py b/iroko/patents/cli.py
new file mode 100644
index 00000000..0d8862ce
--- /dev/null
+++ b/iroko/patents/cli.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+import os
+
+import click
+from flask import current_app
+from flask.cli import with_appcontext
+
+from iroko.patents.api import PatentRecord
+
+
+@click.group()
+def patents():
+ """Command related to patents iroko data."""
+
+
+@patents.command()
+@click.argument('patid')
+@with_appcontext
+def import_from_file(patid):
+ """Load from specific file en data/patents/patents.json"""
+
+ datadir = current_app.config['IROKO_DATA_DIRECTORY']
+ file_path = os.path.join(datadir, 'patents', 'patents.json')
+ PatentRecord.load_from_json_file(file_path, patid)
diff --git a/iroko/patents/ext.py b/iroko/patents/ext.py
new file mode 100644
index 00000000..5a63e142
--- /dev/null
+++ b/iroko/patents/ext.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+
+"""Flask extension for Iroko Patents."""
+
+from __future__ import absolute_import, print_function
+
+from iroko.patents.cli import patents
+
+
+class IrokoPatents(object):
+ """Iroko extension."""
+
+ def __init__(self, app=None):
+ """Extension initialization."""
+ if app:
+ self.init_app(app)
+
+ def init_app(self, app):
+ """Flask application initialization."""
+ app.cli.add_command(patents)
+ # self.init_config(app)
+ app.extensions['iroko-patents'] = self
diff --git a/iroko/patents/fixtures.py b/iroko/patents/fixtures.py
new file mode 100644
index 00000000..04ffd912
--- /dev/null
+++ b/iroko/patents/fixtures.py
@@ -0,0 +1,103 @@
+import datetime
+import os
+from typing import List
+from unicodedata import normalize
+
+from pandas import DataFrame, read_csv
+
+from iroko.records.api import IrokoRecord
+from iroko.records.search import IrokoRecordSearch
+
+
+def _is_cuban_affiliation(affiliation: str):
+ fix_words = ['cuba', 'pinar del rio', 'artemisa'
+ , 'mayabeque', 'matanzas', 'habana'
+ , 'cienfuegos', 'villa clara', 'santa clara'
+ , 'santi spiritus', 'ciego de avila'
+ , 'camaguey', 'las tunas', 'bayamo', 'holguin'
+ , 'santiago de cuba', 'guantanamo']
+ af = normalize('NFC', affiliation.lower())
+ for word in fix_words:
+ if word in af:
+ return True
+ return False
+
+def _is_university_affiliation(affiliation: str):
+ fix_words = ['universidad', 'university']
+ af = normalize('NFC', affiliation.lower())
+ for word in fix_words:
+ if word in af:
+ return True
+ return False
+
+
+def _creator_is_cuban(creator):
+ if 'affiliations' in creator:
+ for aff in creator['affiliations']:
+ if _is_cuban_affiliation(aff):
+ return True
+ return False
+
+
+def _creator_is_author(creator):
+ if 'roles' in creator:
+ for role in creator['roles']:
+ if role == 'Author':
+ return True
+ return False
+
+
+def get_cuban_authors_from_record(rec: IrokoRecord):
+ authors: List[dict] = []
+ if 'creators' in rec:
+ for creator in rec['creators']:
+ if _creator_is_author(creator) and _creator_is_cuban(creator):
+ authors.append(creator)
+ return authors
+
+
+def get_all_cubans_authors_from_records():
+ search = IrokoRecordSearch()
+ cubans = dict()
+ universities = dict()
+ for hit in search.scan():
+ record = IrokoRecord.get_record_by_pid_value(hit.id)
+ authors = get_cuban_authors_from_record(record)
+ for aut in authors:
+ if 'name' in aut and aut['name'] not in cubans:
+ cubans[aut['name']] = aut
+ for aff in aut['affiliations']:
+ if _is_university_affiliation(aff):
+ universities[aut['name']] = aut
+ return cubans, universities
+
+def _tmp_func():
+ search = IrokoRecordSearch()
+ last:str = '2022-12-31'
+ universities = dict()
+ for hit in search.scan():
+ record = IrokoRecord.get_record_by_pid_value(hit.id)
+ cur = record['publication_date']
+ if last > cur:
+ last = cur
+ print('---------------------')
+ print('---------------------')
+ print(last)
+ print(record)
+ print('---------------------')
+ print('---------------------')
+#Helpers for file uploads
+def allowed_file(filename):
+ ALLOWED_EXTENSIONS = {'csv', 'json'}
+ return '.' in filename and \
+ filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
+
+def get_ext(filename):
+ return filename.rsplit('.', 1)[1].lower()
+
+def csv_to_json(file):
+ filename=datetime.datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
+ file.save(os.path.join('./data',filename+'.'+get_ext(file.filename)))
+ df= read_csv(os.path.join('./data')+'/'+filename+'.'+get_ext(file.filename))
+ DataFrame.to_json(df,path_or_buf=os.path.join('./data',filename+'.json'),orient='records')
+ return os.path.join('./data',filename+'.json')
diff --git a/iroko/patents/importaciones/google_patents/gp.py b/iroko/patents/importaciones/google_patents/gp.py
new file mode 100644
index 00000000..51631ee1
--- /dev/null
+++ b/iroko/patents/importaciones/google_patents/gp.py
@@ -0,0 +1,7 @@
+import requests
+import wget
+from time import sleep
+from requests_html import HTMLSession
+import bs4 as bs
+
+
diff --git a/iroko/patents/importaciones/ocpi.py b/iroko/patents/importaciones/ocpi.py
new file mode 100644
index 00000000..b26cb968
--- /dev/null
+++ b/iroko/patents/importaciones/ocpi.py
@@ -0,0 +1,80 @@
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+#
+
+import json
+import requests
+import bs4 as bs
+from lxml import html
+
+website = 'https://wiposearch.ocpi.cu/wopublish-search/public/patents'
+
+patent = {
+ "identifiers": "",
+ "title": "",
+ "authors": [],
+ "affiliations": [],
+ "country": "",
+ "language": "",
+ "creation_date": "",
+ "grant_date": "",
+ "publication_date": "",
+ "legal_status": "",
+}
+
+def getData(url):
+ resultado = requests.get(url)
+ content = resultado.text
+ soup = bs.BeautifulSoup(content, 'html.parser')
+ rows = soup.find('table', {'class': 'table table-view COLUMN'}).find('tbody').find_all('tr')
+ for row in rows:
+ patent["identifiers"] = row.find_all('td')[2].get_text().rstrip()
+ patent["title"] = row.find_all('td')[1].get_text().rstrip()
+ patent["authors"] = row.find_all('td')[9].get_text().rstrip()
+ patent["affiliations"] = row.find_all('td')[8].get_text().rstrip()
+ patent["country"] = "Cuba"
+ patent["language"] = "spanish"
+ patent["creation_date"] = row.find_all('td')[3].get_text().rstrip()
+ patent["grant_date"] = row.find_all('td')[5].get_text().rstrip()
+ patent["publication_date"] = row.find_all('td')[6].get_text().rstrip()
+ patent["legal_status"] = row.find_all('td')[11].get_text().rstrip()
+ json_patent = json.dumps(patent)
+ print(json_patent)
+ return soup
+
+def nextPage(soup):
+ a = soup.find(attrs= {'id': 'id14'})
+ url = a['href']
+ hfb = url.find(';')
+ jh = url.find('?')
+ cadena = url[hfb:jh]
+ k = url.replace(cadena, '')
+ return k
+
+def pagination(url):
+ haySiguiente = True
+ while(haySiguiente):
+ try:
+ soup = getData(url)
+ url_siguiente = nextPage(soup)
+ url = url_siguiente
+ except:
+ haySiguiente = False
+
+ return 'ok'
+
+print(pagination(website))
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/iroko/patents/jsonschemas/__init__.py b/iroko/patents/jsonschemas/__init__.py
new file mode 100644
index 00000000..a1a2aa68
--- /dev/null
+++ b/iroko/patents/jsonschemas/__init__.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+
+"""JSON schemas.
+
+JSON schemas are used to define the structure of the record data of your
+instance.
+
+The documentation of
+`Invenio-JSONSchemas `_
+describes their usage and configuration options.
+"""
diff --git a/iroko/patents/jsonschemas/patents/patent-v1.0.0.json b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json
new file mode 100644
index 00000000..67c7e510
--- /dev/null
+++ b/iroko/patents/jsonschemas/patents/patent-v1.0.0.json
@@ -0,0 +1,176 @@
+{
+ "$schema": "http://json-schema.org/draft-04/schema#",
+ "id": "http://localhost/schemas/patents/patent-v1.0.0.json",
+ "title": "Patent Schema",
+ "type": "object",
+ "additionalProperties": true,
+ "required": [
+ "id",
+ "identifiers"
+ ],
+ "properties": {
+ "id": {
+ "type": "string",
+ "description": "Iroko UUID, pid_type = patid"
+ },
+ "identifiers": {
+ "type": "array",
+ "description": "Patent Identifiers",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "idtype": {
+ "description": "identifier type",
+ "type": "string"
+ },
+ "value": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "title": {
+ "type": "string",
+ "description": "The title of the patent."
+ },
+ "authors": {
+ "type": "array",
+ "description": "A list with the inventors of the patent",
+ "minItems": 0,
+ "items":{
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "The name of the author",
+ "type": "string"
+ },
+ "identifiers": {
+ "type": "array",
+ "description": "Person Identifiers",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "idtype": {
+ "description": "identifier type",
+ "type": "string"
+ },
+ "value": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "affiliations": {
+ "type": "array",
+ "description": "A list with the affiliations of the patent",
+ "minItems": 0,
+ "items":{
+ "type": "object",
+ "properties": {
+ "name": {
+ "description": "The name of the affiliation",
+ "type": "string"
+ },
+ "identifiers": {
+ "type": "array",
+ "description": "Person Identifiers",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "idtype": {
+ "description": "identifier type",
+ "type": "string"
+ },
+ "value": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ }
+ }
+ },
+ "summary": {
+ "type": "string",
+ "description": "A summary of the patent"
+ },
+ "classification": {
+ "type": "string",
+ "description": "The classification of the patent"
+ },
+ "claims": {
+ "type": "string",
+ "description": "An url to the file with the claims of the patent which define in technical terms why protection for the invention is requested"
+ },
+ "prior_art": {
+ "type": "string",
+ "description": "An url to the file with the background and description of the problem"
+ },
+ "drawing": {
+ "type": "string",
+ "description": "An url with a drawing with the design of what you want to patent "
+ },
+ "countries": {
+ "type": "array",
+ "description": "A list of the countries where the inventor wants to patent the product",
+ "items": {
+ "type": "string"
+ }
+ },
+ "country": {
+ "type": "object",
+ "description": "The country where originally the patent was presented",
+ "properties": {
+ "code": {
+ "type": "string",
+ "description": "The ISO 3166-1 alpha-2 code of the country"
+ },
+ "name": {
+ "type": "string",
+ "description": "The name of the country"
+ }
+ }
+ },
+ "language": {
+ "type": "string",
+ "description": "The language of the patent"
+ },
+ "key_words": {
+ "type": "array",
+ "description": "A list of key words of the patent",
+ "items": {
+ "type": "string"
+ }
+ },
+ "creation_date": {
+ "type": "string",
+ "description": "The date the patent was created",
+ "format": "date-time"
+ },
+ "grant_date": {
+ "type": "string",
+ "description": "The date the patent was granted",
+ "format": "date-time"
+ },
+ "publication_date": {
+ "type": "string",
+ "description": "The date the patent was published",
+ "format": "date-time"
+ },
+ "link": {
+ "type": "string",
+ "description": "The link of the patent in Google patents",
+ "format": "date-time"
+ },
+ "legal_status": {
+ "type": "string",
+ "description": "The legal status of the patent"
+ }
+ }
+ }
diff --git a/iroko/patents/loaders/__init__.py b/iroko/patents/loaders/__init__.py
new file mode 100644
index 00000000..4611d81e
--- /dev/null
+++ b/iroko/patents/loaders/__init__.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Loaders.
+
+This file contains sample loaders that can be used to deserialize input data in
+an application level data structure. The marshmallow_loader() method can be
+parameterized with different schemas for the record metadata. In the provided
+json_v1 instance, it uses the MetadataSchemaV1, defining the
+PersistentIdentifier field.
+"""
+
+from __future__ import absolute_import, print_function
+
+from invenio_records_rest.loaders.marshmallow import marshmallow_loader
+
+from iroko.patents.marshmallow import PatentRecordSchemaV1
+
+#: JSON loader using Marshmallow for data validation.
+json_v1 = marshmallow_loader(PatentRecordSchemaV1)
+
+__all__ = (
+ 'json_v1',
+)
diff --git a/iroko/patents/mappings/__init__.py b/iroko/patents/mappings/__init__.py
new file mode 100644
index 00000000..b94e34a2
--- /dev/null
+++ b/iroko/patents/mappings/__init__.py
@@ -0,0 +1,21 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Mappings.
+
+Mappings define how organizations and their fields will be indexed in Elasticsearch.
+The provided record-v1.0.0.json file is an example of how to index patents
+in Elasticsearch. You need to provide one mapping per major version of
+Elasticsearch you want to support.
+"""
+
+from __future__ import absolute_import, print_function
diff --git a/iroko/patents/mappings/v6/__init__.py b/iroko/patents/mappings/v6/__init__.py
new file mode 100644
index 00000000..c4d43bd7
--- /dev/null
+++ b/iroko/patents/mappings/v6/__init__.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Mappings for Elasticsearch 6.x."""
+
+from __future__ import absolute_import, print_function
diff --git a/iroko/patents/mappings/v6/patents/patent-v1.0.0.json b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json
new file mode 100644
index 00000000..6a3ac3d0
--- /dev/null
+++ b/iroko/patents/mappings/v6/patents/patent-v1.0.0.json
@@ -0,0 +1,125 @@
+{
+ "mappings":{
+ "patent-v1.0.0": {
+ "date_detection": false,
+ "numeric_detection": false,
+ "properties": {
+ "$schema": {
+ "type": "text",
+ "index": false
+ },
+ "id": {
+ "type": "keyword"
+ },
+ "identifiers": {
+ "type": "object",
+ "properties": {
+ "idtype": {
+ "type": "keyword"
+ },
+ "value": {
+ "type": "keyword"
+ }
+ }
+ },
+ "title": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "authors": {
+ "type": "object",
+ "properties":{
+ "identifiers": {
+ "type": "object",
+ "properties": {
+ "idtype": {
+ "type": "keyword"
+ },
+ "value": {
+ "type": "keyword"
+ }
+ }
+ },
+ "name": {
+ "type": "keyword"
+ }
+ }
+ },
+ "affiliations": {
+ "type": "object",
+ "properties":{
+ "identifiers": {
+ "type": "object",
+ "properties": {
+ "idtype": {
+ "type": "keyword"
+ },
+ "value": {
+ "type": "keyword"
+ }
+ }
+ },
+ "name": {
+ "type": "keyword"
+ }
+ }
+ },
+ "summary": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "classification": {
+ "type": "text",
+ "analyzer": "spanish"
+ },
+ "claims": {
+ "type": "keyword"
+ },
+ "prior_art": {
+ "type": "keyword"
+ },
+ "drawing": {
+ "type": "keyword"
+ },
+ "countries": {
+ "type": "keyword"
+ },
+ "country": {
+ "type": "object",
+ "properties": {
+ "code": {
+ "type": "keyword"
+ },
+ "name": {
+ "type": "keyword"
+ }
+ }
+ },
+ "language": {
+ "type": "keyword"
+ },
+ "key_words": {
+ "type": "keyword"
+ },
+ "creation_date": {
+ "type": "date",
+ "format": "date"
+ },
+ "grant_date": {
+ "type": "date",
+ "format": "date"
+ },
+ "publication_date": {
+ "type": "date",
+ "format": "date"
+ },
+ "link": {
+ "type": "keyword"
+ },
+ "legal_status": {
+ "type": "keyword"
+ }
+ }
+ }
+ }
+}
diff --git a/iroko/patents/marshmallow/__init__.py b/iroko/patents/marshmallow/__init__.py
new file mode 100644
index 00000000..72fa19d5
--- /dev/null
+++ b/iroko/patents/marshmallow/__init__.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Schemas for marshmallow."""
+
+from __future__ import absolute_import, print_function
+
+from .json import PatentRecordSchemaV1
+
+__all__ = (
+'PatentRecordSchemaV1')
diff --git a/iroko/patents/marshmallow/json.py b/iroko/patents/marshmallow/json.py
new file mode 100644
index 00000000..c49f0c81
--- /dev/null
+++ b/iroko/patents/marshmallow/json.py
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""JSON Schemas."""
+
+from __future__ import absolute_import, print_function
+
+from invenio_jsonschemas import current_jsonschemas
+from invenio_records_rest.schemas import Nested, StrictKeysMixin
+from invenio_records_rest.schemas.fields import (
+ DateString, GenFunction,
+ PersistentIdentifier, SanitizedUnicode,
+ )
+from marshmallow import INCLUDE, fields, missing, validate
+
+allow_empty = validate.Length(min=0)
+
+
+def bucket_from_context(_, context):
+ """Get the record's bucket from context."""
+ record = (context or {}).get('record', {})
+ return record.get('_bucket', missing)
+
+
+def files_from_context(_, context):
+ """Get the record's files from context."""
+ record = (context or {}).get('record', {})
+ return record.get('_files', missing)
+
+
+def schema_from_context(_, context):
+ """Get the record's schema from context."""
+ record = (context or {}).get('record', {})
+ return record.get(
+ "_schema",
+ current_jsonschemas.path_to_url(PatentRecord._schema)
+ )
+
+
+class IdentifierSchemaV1(StrictKeysMixin):
+ """Ids schema."""
+
+ idtype = SanitizedUnicode()
+ value = SanitizedUnicode()
+
+
+class CountrySchemaV1(StrictKeysMixin):
+ name = SanitizedUnicode()
+ code = SanitizedUnicode()
+
+
+class AffiliationsSchemaV1(StrictKeysMixin):
+ id = SanitizedUnicode()
+ identifiers = Nested(IdentifierSchemaV1, many=True, required=True)
+ name = SanitizedUnicode()
+
+class PersonSchemaV1(StrictKeysMixin):
+ id = SanitizedUnicode()
+ identifiers = Nested(IdentifierSchemaV1, many=True, required=True)
+ name = SanitizedUnicode()
+
+
+class PatentMetadataSchemaV1(StrictKeysMixin):
+ """Schema for the record metadata."""
+
+ id = PersistentIdentifier()
+ identifiers = Nested(IdentifierSchemaV1, many=True, required=True)
+ title = SanitizedUnicode(required=True, validate=validate.Length(min=3))
+ authors = Nested(PersonSchemaV1, many=True)
+ affiliations = Nested(AffiliationsSchemaV1, many=True)
+ summary = SanitizedUnicode()
+ classification = SanitizedUnicode()
+ claims = SanitizedUnicode()
+ prior_art = SanitizedUnicode()
+ drawing = SanitizedUnicode()
+ countries = fields.List(SanitizedUnicode(), many=True)
+ country = Nested(CountrySchemaV1, many=False)
+ language = SanitizedUnicode()
+ key_words = fields.List(SanitizedUnicode(), many=True)
+ creation_date = DateString()
+ grant_date = DateString()
+ publication_date = DateString()
+ link = SanitizedUnicode()
+ legal_status = SanitizedUnicode()
+ _schema = GenFunction(
+ attribute="$schema",
+ data_key="$schema",
+ deserialize=schema_from_context, # to be added only when loading
+ )
+
+
+class PatentRecordSchemaV1(StrictKeysMixin):
+ """Record schema."""
+
+ metadata = fields.Nested(PatentMetadataSchemaV1)
+ created = fields.Str(dump_only=True)
+ revision = fields.Integer(dump_only=True)
+ updated = fields.Str(dump_only=True)
+ links = fields.Dict(dump_only=True)
+ id = PersistentIdentifier()
+ files = GenFunction(
+ serialize=files_from_context, deserialize=files_from_context)
+
+patentMetadataSchema = PatentMetadataSchemaV1(many=False, unknown=INCLUDE)
diff --git a/iroko/patents/permissions.py b/iroko/patents/permissions.py
new file mode 100644
index 00000000..112e6442
--- /dev/null
+++ b/iroko/patents/permissions.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Permissions for Iroko."""
+from flask_login import current_user
+from flask_principal import RoleNeed
+from invenio_access import Permission
+
+curator_permission = Permission(RoleNeed('curator'))
+admin_permission = Permission(RoleNeed('admin'))
+
+def can_edit_patent_factory(record, *args, **kwargs):
+ """Checks if logged user can update or delete patent items.
+ """
+ def can(self):
+ if current_user.is_authenticated and curator_permission.can():
+ return True
+ return False
+ return type('Check', (), {'can': can})()
diff --git a/iroko/patents/register/marshmallow.py b/iroko/patents/register/marshmallow.py
new file mode 100644
index 00000000..19db3149
--- /dev/null
+++ b/iroko/patents/register/marshmallow.py
@@ -0,0 +1,25 @@
+from marshmallow import Schema, fields, post_load
+
+from iroko.evaluations.models import EvaluationState
+
+
+class RegisterSchema(Schema):
+
+ id = fields.Int()
+ userEmail = fields.Str(required=False, allow_none=True)
+ date = fields.DateTime()
+ patents = fields.Int()
+
+ @post_load
+ def register_load(self, item, **kwargs):
+ item['userEmail'] = item['userEmail'] if 'userEmail' in item else ''
+ item['date'] = item['date'] if 'date' in item else ''
+ item['patents'] = item['patents'] if 'patents' in item else ''
+ return item
+
+
+register_schema_many = RegisterSchema(
+ many=True, only=(
+ 'id', 'userEmail', 'date', 'patents')
+ )
+register_schema = RegisterSchema(many=False)
diff --git a/iroko/patents/register/model.py b/iroko/patents/register/model.py
new file mode 100644
index 00000000..bcb24f71
--- /dev/null
+++ b/iroko/patents/register/model.py
@@ -0,0 +1,15 @@
+from invenio_accounts.models import User
+from invenio_db import db
+from sqlalchemy_utils.types import JSONType, UUIDType
+
+class Register(db.Model):
+
+ __tablename__ = 'iroko_register'
+
+ id = db.Column(db.Integer, primary_key=True)
+ userEmail = db.Column(db.String)
+ date = db.Column(db.DateTime, nullable=False)
+ patents = db.Column(db.Integer)
+
+ #instancia del registro
+ data = db.Column(JSONType)
diff --git a/iroko/patents/rest.py b/iroko/patents/rest.py
new file mode 100644
index 00000000..76d45e4b
--- /dev/null
+++ b/iroko/patents/rest.py
@@ -0,0 +1,232 @@
+
+
+
+from __future__ import absolute_import, print_function
+
+from datetime import datetime, date
+import json
+import os
+
+from flask import Blueprint, flash, jsonify, make_response, request
+from elasticsearch.exceptions import NotFoundError
+from invenio_pidstore.resolver import Resolver
+from invenio_pidstore.models import PersistentIdentifier
+from invenio_indexer.api import RecordIndexer
+from flask_login import current_user
+from invenio_oauth2server import require_api_auth
+from invenio_db import db
+from iroko.utils import remove_nulls
+from flask_principal import RoleNeed
+from invenio_access import Permission
+
+from iroko.api import IrokoBaseRecord
+from iroko.patents.register.model import Register
+from iroko.patents.register.marshmallow import register_schema, register_schema_many
+from iroko.patents.api import PatentRecord
+from iroko.patents.fixtures import allowed_file, csv_to_json, get_ext
+from iroko.patents.serializers import json_v1_response
+from iroko.pidstore import pids
+from iroko.utils import IrokoResponseStatus, iroko_json_response
+from iroko.pidstore.pids import (
+ IDENTIFIERS_FIELD_TYPE, IROKO_OBJECT_TYPE, PATENT_PID_TYPE, identifiers_schemas,
+ )
+
+api_blueprint = Blueprint(
+ 'iroko_api_patents',
+ __name__,
+ url_prefix='/patents'
+ )
+
+
+@api_blueprint.route('/pid', methods=['GET'])
+def get_patent_by_pid_canonical():
+ """
+ Get a source by any PID received as an argument, including UUID
+ this method gives the directed organization with that pid, even if is obsolete or redirected status
+ """
+ try:
+ _id = request.args.get('value')
+ print("**********************", _id)
+ pid, patent = PatentRecord.get_pat_by_pid(pids.PATENT_PID_TYPE, _id)
+ if not pid or not patent:
+ raise Exception('')
+
+ return json_v1_response(pid, patent)
+
+ except Exception as e:
+ return jsonify({
+ 'ERROR': 'no pid found'.format(_id)
+ })
+
+
+@api_blueprint.route('/import', methods=['POST'])
+def upload_file():
+ try:
+ if not request.is_json:
+ raise Exception("No JSON data provided")
+ input_data = request.json
+ print('=======================', input_data)
+ for data in input_data:
+ if 'assignee' in data:
+ patent = PatentRecord.fix_gp_imported(data)
+ else:
+ patent = PatentRecord.fix_patents_imported(data)
+ patentRecord, msg = PatentRecord.resolve_and_update(data = patent)
+ print('aaaaaaaaaaa',patentRecord)
+ if not patentRecord:
+ print("no pids found, creating patent")
+ patentRecord = PatentRecord.create(patent, iroko_pid_type=pids.PATENT_PID_TYPE)
+ msg = 'created'
+
+ except Exception as e:
+ return jsonify({
+ 'ERROR HOLA': str(e),
+ })
+
+ return jsonify({
+ 'SUCCES':"Patentes creadas",
+ 'message':msg,
+ })
+
+
+@api_blueprint.route('//edit', methods=['POST'])
+def edit_patent(uuid):
+ """
+ Dado un uuid modificar los datos de una patente
+ """
+ try:
+ if not request.is_json:
+ raise Exception("No se especifican datos en formato json para la curacion")
+ input_data = request.json
+ print(input_data)
+ # org = org_json_v1.transform_record(input_data["id"], input_data)
+
+ pat, msg = PatentRecord.resolve_and_update(uuid, input_data)
+
+ if not pat:
+ raise Exception("No se encontro record de patente")
+
+ print("entra a la api de editar patentes...........................................")
+ return jsonify({
+ 'SUCCES':"Patente modificada",
+ 'message':msg,
+ 'org':pat
+ })
+ except Exception as e:
+ print(e)
+ return jsonify({
+ 'ERROR': str(e),
+ })
+
+@api_blueprint.route('/new', methods=['POST'])
+def create_patent():
+ try:
+ if not request.is_json:
+ raise Exception("No JSON data provided")
+
+ input_data = request.json
+ id = input_data['identifiers'][0]['value']
+ pid, patent = PatentRecord.get_pat_by_pid(id)
+ print('PID',pid)
+
+
+ if pid:
+ raise Exception("Patente existente")
+
+ pat= PatentRecord.create(input_data, iroko_pid_type=pids.PATENT_PID_TYPE)
+ msg = 'ok'
+
+ print('PAT',pat)
+
+ return jsonify({
+ 'SUCCES':"Patente creada",
+ 'message':msg,
+ 'pat':pat
+ })
+
+ except Exception as e:
+ return jsonify({
+ 'ERROR': str(e),
+ })
+
+@api_blueprint.route('/delete/', methods=['DELETE'])
+def delete_patent(uuid):
+
+ record = IrokoBaseRecord.get_record_by_pid_value(uuid)
+
+ if not record:
+ raise Exception("No se encontro record de patente")
+
+ result = super(IrokoBaseRecord, record).delete(force=False)
+ db.session.commit()
+ # if delindex:
+ try:
+ RecordIndexer().delete(record)
+ db.session.commit()
+ except NotFoundError:
+ pass
+
+
+ return result
+
+@api_blueprint.route('/register', methods=['GET'])
+def get_register():
+ try:
+ count = int(request.args.get('size')) if request.args.get('size') else 10
+ page = int(request.args.get('page')) if request.args.get('page') else 1
+
+ if page < 1:
+ page = 1
+ offset = count * (page - 1)
+ limit = offset + count
+
+ result = Register.query.all()
+ total = len(result)
+
+ return iroko_json_response(
+ IrokoResponseStatus.SUCCESS, \
+ 'ok', 'register', \
+ {
+ 'data': register_schema_many.dump(result[offset:limit]),
+ 'total': total
+ }
+ )
+
+ except Exception as e:
+ msg = str(e)
+ return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None)
+
+@api_blueprint.route('/register/new', methods=['POST'])
+def create_register():
+ try:
+ input_data = request.json
+ register = Register()
+ register.data = input_data
+ register.userEmail = input_data.get("userEmail")
+ register.date = input_data.get("date")
+ register.patents = input_data.get("patents")
+
+ db.session.add(register)
+ db.session.commit()
+
+ msg = "New Register Created"
+
+ except Exception as e:
+ msg = str(e)
+ return iroko_json_response(IrokoResponseStatus.ERROR, msg, None, None)
+
+ return iroko_json_response(
+ IrokoResponseStatus.SUCCESS, \
+ msg, 'register', \
+ register_schema.dump(register),
+ )
+
+@api_blueprint.route('/register/delete/', methods=['DELETE'])
+def delete_register(id):
+ register = Register.query.filter_by(id = id).delete()
+ db.session.commit()
+
+ return make_response("Eliminado", 204)
+
+
+
diff --git a/iroko/patents/search.py b/iroko/patents/search.py
new file mode 100644
index 00000000..d81931f1
--- /dev/null
+++ b/iroko/patents/search.py
@@ -0,0 +1,19 @@
+
+"""Source search APIs."""
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+from invenio_search import RecordsSearch
+
+
+class PatentsSearch(RecordsSearch):
+ """RecordsSearch for sources."""
+
+ class Meta:
+ """Search only on patents index."""
+
+ index = "patents"
+ doc_types = None
diff --git a/iroko/patents/serializers/__init__.py b/iroko/patents/serializers/__init__.py
new file mode 100644
index 00000000..a664f724
--- /dev/null
+++ b/iroko/patents/serializers/__init__.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2022. Universidad de Pinar del Rio
+# This file is part of SCEIBA (sceiba.cu).
+# SCEIBA is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+#
+#
+# Iroko is free software; you can redistribute it and/or modify it under the
+# terms of the MIT License; see LICENSE file for more details.
+
+"""Record serializers."""
+
+from __future__ import absolute_import, print_function
+
+from invenio_records_rest.serializers.json import JSONSerializer
+from invenio_records_rest.serializers.response import (
+ record_responsify,
+ search_responsify,
+ )
+
+# Serializers
+# ===========
+#: JSON serializer definition.
+from iroko.patents.marshmallow.json import PatentRecordSchemaV1
+
+json_v1 = JSONSerializer(PatentRecordSchemaV1, replace_refs=True)
+
+# Records-REST serializers
+# ========================
+#: JSON record serializer for individual organizations.
+json_v1_response = record_responsify(json_v1, 'application/json')
+#: JSON record serializer for search results.
+json_v1_search = search_responsify(json_v1, 'application/json')
+
+__all__ = (
+ 'json_v1',
+ 'json_v1_response',
+ 'json_v1_search',
+)
diff --git a/iroko/patents/utils.py b/iroko/patents/utils.py
new file mode 100644
index 00000000..61efa26e
--- /dev/null
+++ b/iroko/patents/utils.py
@@ -0,0 +1,65 @@
+
+
+from lxml import etree
+
+from iroko.records import ContributorRole
+
+
+def get_people_from_nlm(metadata: etree._Element):
+ """get a PersonRecord from {http://dtd.nlm.nih.gov/publishing/2.3}contrib
+ etree._Element
+ return creators, contribs dics, """
+
+ xmlns = '{http://dtd.nlm.nih.gov/publishing/2.3}'
+ contribs_xml = metadata.findall('.//' + xmlns + 'contrib')
+
+ contributors = {}
+
+ for contrib in contribs_xml:
+ person = dict()
+
+ surname = contrib.find(xmlns + 'name/' + xmlns + 'surname')
+ given_names = contrib.find(xmlns + 'name/' + xmlns + 'given-names')
+ aff = contrib.find(xmlns + 'aff')
+ email = contrib.find(xmlns + 'email')
+ if given_names is None and surname is None:
+ # FIXME if a person dont have surname or given name, then is not a person....
+ # even if there is an email?
+ continue
+ else:
+ name = ""
+ if given_names is not None and given_names.text is not None:
+ name += given_names.text
+ if surname is not None and surname.text is not None:
+ name += ' ' + surname.text
+ person['name'] = name
+ if aff is not None:
+ person['affiliations'] = []
+ person['affiliations'].append(aff.text)
+ if email is not None:
+ person['email'] = email.text
+ person['roles'] = []
+ if 'corresp' in contrib.attrib:
+ if contrib.attrib['corresp'] == "yes":
+ person['roles'].append(ContributorRole.ContactPerson.value)
+ if 'contrib-type' in contrib.attrib:
+ ctype = contrib.attrib['contrib-type']
+ if ctype == "author":
+ person['roles'].append(ContributorRole.Author.value)
+ if ctype == "editor":
+ person['roles'].append(ContributorRole.Editor.value)
+ if ctype == "jmanager":
+ person['roles'].append(ContributorRole.JournalManager.value)
+ if person['name'] in contributors.keys():
+ contributors[person['name']]['roles'].extend(person['roles'])
+ else:
+ contributors[person['name']] = person
+ creators = []
+ contribs = []
+ for name in contributors:
+ person = contributors[name]
+ if ContributorRole.Author.value in person['roles']:
+ creators.append(person)
+ else:
+ contribs.append(person)
+ return creators, contribs
diff --git a/iroko/pidstore/fetchers.py b/iroko/pidstore/fetchers.py
index 57dfdcc5..043b76e8 100755
--- a/iroko/pidstore/fetchers.py
+++ b/iroko/pidstore/fetchers.py
@@ -73,6 +73,13 @@ def person_uuid_fetcher(per_uuid, data):
pid_value=str(data[pids.IROKO_UUID_FIELD]),
)
+def patent_uuid_fetcher(per_uuid, data):
+ return FetchedPID(
+ provider=providers.PatentUUIDProvider,
+ pid_type=providers.PatentUUIDProvider.pid_type,
+ pid_value=str(data[pids.IROKO_UUID_FIELD]),
+ )
+
def identifiers_fetcher(record_uuid, data, pid_type):
assert data, "no data"
diff --git a/iroko/pidstore/minters.py b/iroko/pidstore/minters.py
index 0c3ebbd8..822acd95 100755
--- a/iroko/pidstore/minters.py
+++ b/iroko/pidstore/minters.py
@@ -74,14 +74,21 @@ def organization_uuid_minter(org_uuid, data):
return provider.pid
-def person_uuid_minter(org_uuid, data):
+def person_uuid_minter(person_uuid, data):
provider = providers.PersonUUIDProvider.create(
object_type=pids.IROKO_OBJECT_TYPE,
- object_uuid=org_uuid,
+ object_uuid=person_uuid,
data=data
)
return provider.pid
+def patent_uuid_minter(patent_uuid, data):
+ provider = providers.PatentUUIDProvider.create(
+ object_type=pids.IROKO_OBJECT_TYPE,
+ object_uuid=patent_uuid,
+ data=data
+ )
+ return provider.pid
def identifiers_minter(uuid, data, object_type):
prsIDs = providers.IdentifiersProvider.create_identifiers(
diff --git a/iroko/pidstore/pids.py b/iroko/pidstore/pids.py
index 6652991a..fde5eee1 100644
--- a/iroko/pidstore/pids.py
+++ b/iroko/pidstore/pids.py
@@ -35,6 +35,10 @@
PERSON_PID_MINTER = "perid"
PERSON_PID_FETCHER = "perid"
+PATENT_PID_TYPE = "patid"
+PATENT_PID_MINTER = "patid"
+PATENT_PID_FETCHER = "patid"
+
IROKO_OBJECT_TYPE = "rec"
IROKO_UUID_FIELD = "id"
@@ -42,7 +46,8 @@
'irouid',
'srcid',
'orgid',
- 'perid'
+ 'perid',
+ 'patid'
]
def get_pid_by_data(data):
diff --git a/iroko/pidstore/providers.py b/iroko/pidstore/providers.py
index 7fd1fecf..e6ccadad 100755
--- a/iroko/pidstore/providers.py
+++ b/iroko/pidstore/providers.py
@@ -265,6 +265,43 @@ def create(cls, pid_type=None, pid_value=None, object_type=None,
)
+class PatentUUIDProvider(BaseProvider):
+ """Document identifier provider."""
+
+ pid_type = pids.PATENT_PID_TYPE
+ """Type of persistent identifier."""
+
+ pid_provider = None
+ """Provider name.
+ The provider name is not recorded in the PID since the provider does not
+ provide any additional features besides creation of record ids.
+ """
+
+ default_status = PIDStatus.REGISTERED
+ """Record IDs are by default registered immediately.
+ Default: :attr:`invenio_pidstore.models.PIDStatus.REGISTERED`
+ """
+
+ object_type = pids.IROKO_OBJECT_TYPE,
+
+ @classmethod
+ def create(cls, pid_type=None, pid_value=None, object_type=None,
+ object_uuid=None, **kwargs):
+ """Create a new record identifier from the depoist PID value."""
+ pid_type = pid_type or cls.pid_type
+ pid_value = pid_value or uuid.uuid4()
+ object_type = object_type or cls.object_type
+ object_uuid = object_uuid or uuid.uuid4()
+ kwargs.setdefault('status', cls.default_status)
+ return super(PatentUUIDProvider, cls).create(
+ pid_type=pid_type,
+ pid_value=pid_value,
+ object_type=object_type,
+ object_uuid=object_uuid,
+ **kwargs
+ )
+
+
class IdentifiersProvider(BaseProvider):
default_status = PIDStatus.REGISTERED
diff --git a/pyproject.toml b/pyproject.toml
index 0c35b9c4..3f18ad3d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -105,6 +105,7 @@ iroko_harvester = "iroko.harvester.ext:IrokoHarvester"
invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles"
iroko_organizations = "iroko.organizations.ext:IrokoOrganizations"
iroko_persons = "iroko.persons.ext:IrokoPersons"
+iroko_patents = "iroko.patents.ext:IrokoPatents"
[tool.poetry.plugins."invenio_base.blueprints"]
iroko = "iroko.theme.views:blueprint"
@@ -130,18 +131,23 @@ iroko = "iroko.records:iroko"
invenio_userprofiles = "iroko.userprofiles:InvenioUserProfiles"
iroko_organizations = "iroko.organizations:IrokoOrganizations"
iroko_persons = "iroko.persons:IrokoPersons"
+iroko_patents = "iroko.patents:IrokoPatents"
[tool.poetry.plugins."invenio_jsonschemas.schemas"]
iroko = "iroko.records.jsonschemas"
sources = "iroko.sources.schemas"
organizations = "iroko.organizations.jsonschemas"
persons = "iroko.persons.jsonschemas"
+patents = "iroko.patents.jsonschemas"
+
[tool.poetry.plugins."invenio_search.mappings"]
records = "iroko.records.mappings"
sources = "iroko.sources.mappings"
organizations = "iroko.organizations.mappings"
persons = "iroko.persons.mappings"
+patents = "iroko.patents.mappings"
+
[tool.poetry.plugins."invenio_admin.views"]
vocabulary_admin = "iroko.vocabularies.admin:vocabularies_adminview"
@@ -162,6 +168,7 @@ iroko_sources = "iroko.sources.models"
iroko_harvester = "iroko.harvester.models"
invenio_userprofiles = "iroko.userprofiles.models"
iroko_evaluations = "iroko.evaluations.models"
+iroko_register = "iroko.patents.register.model"
[tool.poetry.plugins."invenio_base.api_blueprints"]
iroko_taxonomy = "iroko.vocabularies.rest:api_blueprint"
@@ -173,6 +180,9 @@ invenio_userprofiles = "iroko.userprofiles.rest:api_blueprint"
iroko_records = "iroko.records.rest:api_blueprint"
iroko_organizations = "iroko.organizations.rest:api_blueprint"
iroko_persons = "iroko.persons.rest:api_blueprint"
+iroko_patents = "iroko.patents.rest:api_blueprint"
+iroko_register = "iroko.patents.rest:api_blueprint"
+
iroko_evaluations = "iroko.evaluations.rest:api_blueprint"
[tool.poetry.plugins."invenio_celery.tasks"]
@@ -185,6 +195,7 @@ recoai = "iroko.pidstore.fetchers:iroko_source_oai_fetcher"
srcid = "iroko.pidstore.fetchers:iroko_source_uuid_fetcher"
orgid = "iroko.pidstore.fetchers:organization_uuid_fetcher"
perid = "iroko.pidstore.fetchers:person_uuid_fetcher"
+patid = "iroko.pidstore.fetchers:patent_uuid_fetcher"
[tool.poetry.plugins."invenio_pidstore.minters"]
irouid = "iroko.pidstore.minters:iroko_uuid_minter"
@@ -193,6 +204,7 @@ recoai = "iroko.pidstore.minters:iroko_source_oai_minter"
srcid = "iroko.pidstore.minters:iroko_source_uuid_minter"
orgid = "iroko.pidstore.minters:organization_uuid_minter"
perid = "iroko.pidstore.minters:person_uuid_minter"
+patid = "iroko.pidstore.minters:patent_uuid_minter"
[tool.poetry.plugins."invenio_db.alembic"]
invenio_userprofiles = "iroko.userprofiles:alembic"
diff --git a/run b/run
index 25506955..9415b3d4 100755
--- a/run
+++ b/run
@@ -18,7 +18,7 @@ export FLASK_ENV=development
invenio run \
--cert "$script_path"/docker/nginx/test.crt \
--key "$script_path"/docker/nginx/test.key\
- --host "10.16.64.222" \
+ # --host "10.16.64.222" \
& pid_server=$!
#trap 'kill $pid_celery $pid_server &>/dev/null' EXIT
diff --git a/setup.py b/setup.py
index 9ebb84eb..6287ea09 100644
--- a/setup.py
+++ b/setup.py
@@ -142,6 +142,8 @@
'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles',
'iroko_organizations = iroko.organizations.ext:IrokoOrganizations',
'iroko_persons = iroko.persons.ext:IrokoPersons',
+ 'iroko_patents = iroko.patents.ext:IrokoPatents',
+
],
'invenio_base.blueprints': [
'iroko = iroko.theme.views:blueprint',
@@ -168,18 +170,21 @@
'invenio_userprofiles = iroko.userprofiles:InvenioUserProfiles',
'iroko_organizations = iroko.organizations:IrokoOrganizations',
'iroko_persons = iroko.persons:IrokoPersons',
+ 'iroko_patents = iroko.patents:IrokoPatents',
],
'invenio_jsonschemas.schemas': [
'iroko = iroko.records.jsonschemas',
'sources = iroko.sources.schemas',
'organizations = iroko.organizations.jsonschemas',
'persons = iroko.persons.jsonschemas',
+ 'patents = iroko.patents.jsonschemas',
],
'invenio_search.mappings': [
'records = iroko.records.mappings',
'sources = iroko.sources.mappings',
'organizations = iroko.organizations.mappings',
'persons = iroko.persons.mappings',
+ 'patents = iroko.patents.mappings',
],
'invenio_admin.views': [
'vocabulary_admin = iroko.vocabularies.admin:vocabularies_adminview',
@@ -203,6 +208,7 @@
'iroko_harvester = iroko.harvester.models',
'invenio_userprofiles = iroko.userprofiles.models',
'iroko_evaluations = iroko.evaluations.models',
+ 'iroko_register = iroko.patents.register.model',
],
'invenio_base.api_blueprints': [
'iroko_taxonomy = iroko.vocabularies.rest:api_blueprint',
@@ -214,7 +220,9 @@
'iroko_records = iroko.records.rest:api_blueprint',
'iroko_organizations = iroko.organizations.rest:api_blueprint',
'iroko_persons = iroko.persons.rest:api_blueprint',
+ 'iroko_patents = iroko.patents.rest:api_blueprint',
'iroko_evaluations = iroko.evaluations.rest:api_blueprint',
+ 'iroko_register = iroko.patents.rest:api_blueprint',
],
'invenio_celery.tasks': [
'iroko_harvester = iroko.harvester.tasks'
@@ -226,6 +234,7 @@
'srcid = iroko.pidstore.fetchers:iroko_source_uuid_fetcher',
'orgid = iroko.pidstore.fetchers:organization_uuid_fetcher',
'perid = iroko.pidstore.fetchers:person_uuid_fetcher',
+ 'patid = iroko.pidstore.fetchers:patents_uuid_fetcher',
],
'invenio_pidstore.minters': [
'irouid = iroko.pidstore.minters:iroko_uuid_minter',
@@ -234,6 +243,7 @@
'srcid = iroko.pidstore.minters:iroko_source_uuid_minter',
'orgid = iroko.pidstore.minters:organization_uuid_minter',
'perid = iroko.pidstore.minters:person_uuid_minter',
+ 'patid = iroko.pidstore.minters:patents_uuid_minter',
],
'invenio_db.alembic': [
'invenio_userprofiles = iroko.userprofiles:alembic',