-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathner_utils.py
More file actions
35 lines (29 loc) · 892 Bytes
/
ner_utils.py
File metadata and controls
35 lines (29 loc) · 892 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# ner_utils.py
from typing import List, Dict
import spacy
# Simple in-memory cache so we don't load the model multiple times
_nlp_cache = {}
def load_model(model_name: str = "en_core_web_sm"):
"""
Loads a spaCy model and caches it.
"""
if model_name not in _nlp_cache:
_nlp_cache[model_name] = spacy.load(model_name)
return _nlp_cache[model_name]
def extract_entities(text: str, model_name: str = "en_core_web_sm") -> List[Dict]:
"""
Runs NER on the given text and returns a list of entities with spans.
"""
nlp = load_model(model_name)
doc = nlp(text)
entities = []
for ent in doc.ents:
entities.append(
{
"text": ent.text,
"label": ent.label_,
"start_char": ent.start_char,
"end_char": ent.end_char,
}
)
return entities