-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathweb_api.py
More file actions
200 lines (171 loc) · 8.05 KB
/
web_api.py
File metadata and controls
200 lines (171 loc) · 8.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from typing import Optional, List, Dict, Any
import os
import sys
import logging
from dotenv import load_dotenv
from src.research_core import ResearchCore, save_results_to_json
from src.models import CustomOpenAI
import json
import markdown
# Load environment variables from .env file
load_dotenv()
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Add the src directory to the Python path so we can import research_core
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Get OpenAI API key from environment variable
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
if not OPENAI_API_KEY:
raise ValueError("OPENAI_API_KEY environment variable is not set")
app = FastAPI()
# Pydantic model for the request body
class ResearchRequest(BaseModel):
research_question: str
use_zotero: bool = True
use_arxiv: bool = True
use_full_text: bool = False
max_papers_to_download: int = 10
llm_model: str = "o4-mini"
zotero_config: Optional[Dict[str, Any]] = None
arxiv_config: Optional[Dict[str, Any]] = None
zotero_collection_names: Optional[List[str]] = None
# Pydantic model for the response
class ResearchResponse(BaseModel):
literature_review: str
file_path: str
zotero_papers: Optional[List[Dict[str, Any]]] = None
arxiv_papers: Optional[List[Dict[str, Any]]] = None
@app.post("/run_research", response_model=ResearchResponse)
async def run_research(request: ResearchRequest):
try:
logger.info(f"Received research request: {request.research_question}")
openai_client = CustomOpenAI(api_key=OPENAI_API_KEY, model=request.llm_model)
# Use provided configs or default values
zotero_config = request.zotero_config
arxiv_config = request.arxiv_config
logger.info("Initializing ResearchCore...")
# Initialize the research core with both engines
research_core = ResearchCore(
llm_model=request.llm_model,
output_dir='./research_output',
zotero_config=zotero_config,
arxiv_config=arxiv_config
)
# Set the OpenAI client for the research core
research_core.llm = openai_client
logger.info("Running literature review...")
# Run the literature review
results = research_core.run_literature_review(
research_question=request.research_question,
use_zotero=request.use_zotero,
use_arxiv=request.use_arxiv,
zotero_collection_names=request.zotero_collection_names,
use_full_text=request.use_full_text,
max_papers_to_download=request.max_papers_to_download
)
logger.info("Saving results to JSON...")
# Save results to JSON
save_results_to_json(results)
# Extract paper details from the documents
zotero_papers = []
arxiv_papers = []
if 'documents' in results:
# Create a mapping of paper titles to their order in the literature review
lit_review = results.get('literature_review', '')
paper_order = {}
for i, line in enumerate(lit_review.split('\n')):
if line.strip().startswith('[') and ']' in line:
title = line.split(']', 1)[1].strip()
paper_order[title.lower()] = i
# Process documents and maintain order
for doc in results['documents']:
# Handle both dictionary and object metadata
if isinstance(doc, dict):
metadata = doc.get('metadata', {})
else:
metadata = getattr(doc, 'metadata', {})
# Extract title and clean it
title = metadata.get('title', 'Untitled')
if title.endswith('.'):
title = title[:-1]
# Extract year based on source
year = 'Unknown'
source = metadata.get('source', '').lower()
if source == 'arxiv':
# For ArXiv papers, extract year from ID or published date
if 'arxiv_id' in metadata:
year = '20' + metadata['arxiv_id'][:2]
elif 'published' in metadata:
try:
year = metadata['published'][:4]
except:
pass
elif source == 'zotero':
# For Zotero papers, try to get year from various fields
if 'date' in metadata:
try:
year = metadata['date'][:4]
except:
pass
elif 'year' in metadata:
year = str(metadata['year'])
# Check if paper has full text
has_full_text = metadata.get('has_full_text', False)
paper_info = {
'title': title,
'authors': metadata.get('authors', 'Unknown'),
'year': year,
'order': paper_order.get(title.lower(), 9999), # Default to end if not found
'has_full_text': has_full_text
}
# Add to appropriate list based on source
if source == 'zotero':
zotero_papers.append(paper_info)
elif source == 'arxiv':
# Format ArXiv link properly
arxiv_id = None
if 'arxiv_id' in metadata:
arxiv_id = metadata['arxiv_id']
elif 'id' in metadata:
# Extract ID from the full URL if present
id_match = metadata['id'].split('/')[-1]
if id_match:
arxiv_id = id_match
if arxiv_id:
# Clean up the ID (remove version number if present)
arxiv_id = arxiv_id.split('v')[0]
paper_info['link'] = f"https://arxiv.org/abs/{arxiv_id}"
else:
paper_info['link'] = '#'
arxiv_papers.append(paper_info)
else:
# If source is not specified, try to determine from metadata
if 'arxiv_id' in metadata or 'id' in metadata:
arxiv_id = metadata.get('arxiv_id') or metadata.get('id', '').split('/')[-1].split('v')[0]
paper_info['link'] = f"https://arxiv.org/abs/{arxiv_id}" if arxiv_id else '#'
arxiv_papers.append(paper_info)
else:
zotero_papers.append(paper_info)
# Sort papers by their order in the literature review
arxiv_papers.sort(key=lambda x: x['order'])
zotero_papers.sort(key=lambda x: x['order'])
# Remove the order field before sending to frontend
for paper in arxiv_papers + zotero_papers:
paper.pop('order', None)
logger.info(f"Extracted {len(zotero_papers)} Zotero papers and {len(arxiv_papers)} ArXiv papers")
logger.info("Research completed successfully")
return ResearchResponse(
literature_review=results['literature_review'],
file_path=results['file_path'],
zotero_papers=zotero_papers if zotero_papers else None,
arxiv_papers=arxiv_papers if arxiv_papers else None
)
except Exception as e:
logger.error(f"Error processing research request: {str(e)}", exc_info=True)
raise HTTPException(status_code=500, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)