Skip to content

Commit 1d19e6d

Browse files
committed
align notebooks
1 parent 5662adc commit 1d19e6d

3 files changed

Lines changed: 114 additions & 13 deletions

File tree

src/evaluation_instruments/instruments/epic_draft_appeal/Draft_Appeal.ipynb

Lines changed: 112 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
},
2929
{
3030
"cell_type": "code",
31-
"execution_count": null,
31+
"execution_count": 1,
3232
"id": "fb6539a1",
3333
"metadata": {},
3434
"outputs": [],
@@ -60,7 +60,7 @@
6060
},
6161
{
6262
"cell_type": "code",
63-
"execution_count": null,
63+
"execution_count": 2,
6464
"id": "edaee0f2",
6565
"metadata": {},
6666
"outputs": [],
@@ -99,10 +99,18 @@
9999
},
100100
{
101101
"cell_type": "code",
102-
"execution_count": null,
102+
"execution_count": 3,
103103
"id": "604d7a61",
104104
"metadata": {},
105-
"outputs": [],
105+
"outputs": [
106+
{
107+
"name": "stderr",
108+
"output_type": "stream",
109+
"text": [
110+
"DEBUG:evaluation:Set up with log_enabled=True and capacity 10000\n"
111+
]
112+
}
113+
],
106114
"source": [
107115
"from draft_appeal_prompt import to_prompt\n",
108116
"import evaluation_instruments as ev\n",
@@ -146,10 +154,19 @@
146154
},
147155
{
148156
"cell_type": "code",
149-
"execution_count": null,
157+
"execution_count": 4,
150158
"id": "4db685fe",
151159
"metadata": {},
152-
"outputs": [],
160+
"outputs": [
161+
{
162+
"name": "stderr",
163+
"output_type": "stream",
164+
"text": [
165+
"DEBUG:evaluation:000-Completed evaluation\n",
166+
"INFO:evaluation:Dumped raw content to None\n"
167+
]
168+
}
169+
],
153170
"source": [
154171
"output = evaluator.run_dataset(input_df, model='gpt-4o-mini')"
155172
]
@@ -164,10 +181,79 @@
164181
},
165182
{
166183
"cell_type": "code",
167-
"execution_count": null,
184+
"execution_count": 5,
168185
"id": "7b248b1e-9ef8-4add-b057-d06de4f07f39",
169186
"metadata": {},
170-
"outputs": [],
187+
"outputs": [
188+
{
189+
"data": {
190+
"text/html": [
191+
"<div>\n",
192+
"<style scoped>\n",
193+
" .dataframe tbody tr th:only-of-type {\n",
194+
" vertical-align: middle;\n",
195+
" }\n",
196+
"\n",
197+
" .dataframe tbody tr th {\n",
198+
" vertical-align: top;\n",
199+
" }\n",
200+
"\n",
201+
" .dataframe thead th {\n",
202+
" text-align: right;\n",
203+
" }\n",
204+
"</style>\n",
205+
"<table border=\"1\" class=\"dataframe\">\n",
206+
" <thead>\n",
207+
" <tr style=\"text-align: right;\">\n",
208+
" <th></th>\n",
209+
" <th>TextQuality</th>\n",
210+
" <th>MedicalTerminology</th>\n",
211+
" <th>Grammar</th>\n",
212+
" <th>TextFormat</th>\n",
213+
" <th>Tone</th>\n",
214+
" <th>References</th>\n",
215+
" <th>RelevantReferences</th>\n",
216+
" <th>MedicalNecessity</th>\n",
217+
" <th>FalseReasoning</th>\n",
218+
" <th>Opposition</th>\n",
219+
" <th>FactualAccuracy</th>\n",
220+
" </tr>\n",
221+
" </thead>\n",
222+
" <tbody>\n",
223+
" <tr>\n",
224+
" <th>000</th>\n",
225+
" <td>4</td>\n",
226+
" <td>5</td>\n",
227+
" <td>5</td>\n",
228+
" <td>4</td>\n",
229+
" <td>5</td>\n",
230+
" <td>5</td>\n",
231+
" <td>5</td>\n",
232+
" <td>5</td>\n",
233+
" <td>5</td>\n",
234+
" <td>5</td>\n",
235+
" <td>5</td>\n",
236+
" </tr>\n",
237+
" </tbody>\n",
238+
"</table>\n",
239+
"</div>"
240+
],
241+
"text/plain": [
242+
" TextQuality MedicalTerminology Grammar TextFormat Tone References \\\n",
243+
"000 4 5 5 4 5 5 \n",
244+
"\n",
245+
" RelevantReferences MedicalNecessity FalseReasoning Opposition \\\n",
246+
"000 5 5 5 5 \n",
247+
"\n",
248+
" FactualAccuracy \n",
249+
"000 5 "
250+
]
251+
},
252+
"execution_count": 5,
253+
"metadata": {},
254+
"output_type": "execute_result"
255+
}
256+
],
171257
"source": [
172258
"grades = ev.frame_from_evals(output[0])\n",
173259
"grades.xs('score', axis=1, level=1)"
@@ -178,16 +264,31 @@
178264
"execution_count": null,
179265
"id": "ec27374a",
180266
"metadata": {},
181-
"outputs": [],
267+
"outputs": [
268+
{
269+
"ename": "KeyError",
270+
"evalue": "\"['evidence'] not in index\"",
271+
"output_type": "error",
272+
"traceback": [
273+
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
274+
"\u001b[31mKeyError\u001b[39m Traceback (most recent call last)",
275+
"\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[6]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m \u001b[38;5;28;01mwith\u001b[39;00m pd.option_context(\u001b[33m'\u001b[39m\u001b[33mdisplay.max_colwidth\u001b[39m\u001b[33m'\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m):\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m display(\u001b[43mgrades\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mMedicalNecessity\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mscore\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[33;43m'\u001b[39;49m\u001b[33;43mevidence\u001b[39;49m\u001b[33;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m]\u001b[49m)\n",
276+
"\u001b[36mFile \u001b[39m\u001b[32m~/workspace/local/venv/lib/python3.12/site-packages/pandas/core/frame.py:4108\u001b[39m, in \u001b[36mDataFrame.__getitem__\u001b[39m\u001b[34m(self, key)\u001b[39m\n\u001b[32m 4106\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_iterator(key):\n\u001b[32m 4107\u001b[39m key = \u001b[38;5;28mlist\u001b[39m(key)\n\u001b[32m-> \u001b[39m\u001b[32m4108\u001b[39m indexer = \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m.\u001b[49m\u001b[43m_get_indexer_strict\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mcolumns\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m)\u001b[49m[\u001b[32m1\u001b[39m]\n\u001b[32m 4110\u001b[39m \u001b[38;5;66;03m# take() does not accept boolean indexers\u001b[39;00m\n\u001b[32m 4111\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(indexer, \u001b[33m\"\u001b[39m\u001b[33mdtype\u001b[39m\u001b[33m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m) == \u001b[38;5;28mbool\u001b[39m:\n",
277+
"\u001b[36mFile \u001b[39m\u001b[32m~/workspace/local/venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:6200\u001b[39m, in \u001b[36mIndex._get_indexer_strict\u001b[39m\u001b[34m(self, key, axis_name)\u001b[39m\n\u001b[32m 6197\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 6198\u001b[39m keyarr, indexer, new_indexer = \u001b[38;5;28mself\u001b[39m._reindex_non_unique(keyarr)\n\u001b[32m-> \u001b[39m\u001b[32m6200\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_raise_if_missing\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkeyarr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindexer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maxis_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 6202\u001b[39m keyarr = \u001b[38;5;28mself\u001b[39m.take(indexer)\n\u001b[32m 6203\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, Index):\n\u001b[32m 6204\u001b[39m \u001b[38;5;66;03m# GH 42790 - Preserve name from an Index\u001b[39;00m\n",
278+
"\u001b[36mFile \u001b[39m\u001b[32m~/workspace/local/venv/lib/python3.12/site-packages/pandas/core/indexes/base.py:6252\u001b[39m, in \u001b[36mIndex._raise_if_missing\u001b[39m\u001b[34m(self, key, indexer, axis_name)\u001b[39m\n\u001b[32m 6249\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mNone of [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m] are in the [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00maxis_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m]\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m 6251\u001b[39m not_found = \u001b[38;5;28mlist\u001b[39m(ensure_index(key)[missing_mask.nonzero()[\u001b[32m0\u001b[39m]].unique())\n\u001b[32m-> \u001b[39m\u001b[32m6252\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnot_found\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m not in index\u001b[39m\u001b[33m\"\u001b[39m)\n",
279+
"\u001b[31mKeyError\u001b[39m: \"['evidence'] not in index\""
280+
]
281+
}
282+
],
182283
"source": [
183284
"with pd.option_context('display.max_colwidth', None):\n",
184-
" display(grades['MedicalNecessity'][['score','evidence']])"
285+
" display(grades['MedicalNecessity'][['score','explanation']])"
185286
]
186287
}
187288
],
188289
"metadata": {
189290
"kernelspec": {
190-
"display_name": ".venv",
291+
"display_name": "Python 3 (ipykernel)",
191292
"language": "python",
192293
"name": "python3"
193294
},

src/evaluation_instruments/instruments/epic_summary_of_care/Summary_of_Care.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@
181181
"outputs": [],
182182
"source": [
183183
"with pd.option_context('display.max_colwidth', None):\n",
184-
" display(grades['TextQuality'][['score','evidence']])"
184+
" display(grades['TextQuality'][['score','explanation']])"
185185
]
186186
}
187187
],

src/evaluation_instruments/instruments/pdsqi_9/PDSQI_annotated.ipynb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -173,7 +173,7 @@
173173
"metadata": {},
174174
"outputs": [],
175175
"source": [
176-
"grades = pd.DataFrame.from_dict(output[0], orient='index')"
176+
"grades = ev.frame_from_evals(output[0])"
177177
]
178178
},
179179
{

0 commit comments

Comments
 (0)