From df6bde33c04acb1d02a2c77ffe8ddc43c2254ba8 Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Thu, 1 May 2025 16:55:18 +0000
Subject: [PATCH 1/7] =?UTF-8?q?=F0=9F=AA=B3providers=20dont=20return=20jso?=
 =?UTF-8?q?n?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .devcontainer/image/stagefiles/layer.sh     |  4 ++-
 src/evaluation_instruments/_evaluation.py   | 29 +++++++++++++--------
 src/evaluation_instruments/post/__init__.py | 23 +++++++++++++++-
 3 files changed, 43 insertions(+), 13 deletions(-)

diff --git a/.devcontainer/image/stagefiles/layer.sh b/.devcontainer/image/stagefiles/layer.sh
index 4024fa8..ffc8c3c 100644
--- a/.devcontainer/image/stagefiles/layer.sh
+++ b/.devcontainer/image/stagefiles/layer.sh
@@ -14,7 +14,9 @@ apt-get install --no-install-recommends --yes --quiet \
     graphviz \
     libgraphviz-dev \
     pandoc \
-    virtualenv
+    virtualenv \
+    ca-certificates
+
 
 # Make vim tiny default
 ln -sfn /usr/bin/vi /usr/bin/vim
diff --git a/src/evaluation_instruments/_evaluation.py b/src/evaluation_instruments/_evaluation.py
index 3944acd..c5ffe64 100644
--- a/src/evaluation_instruments/_evaluation.py
+++ b/src/evaluation_instruments/_evaluation.py
@@ -66,7 +66,9 @@ def __init__(
 
         self.log_enabled = log_enabled
         self._model_args = model_args or {}
-        self.capacity = TokenUsage(None, None, max_tokens)
+
+        self.tmp_dir: Optional[Path] = None
+        self.capacity: TokenUsage = TokenUsage(None, None, max_tokens)
 
         logger.debug(f"Set up with {log_enabled=} and capacity {max_tokens}")
 
@@ -142,9 +144,7 @@ def run_dataset(self, df: "pd.DataFrame", model: str = None, capacity: int = Non
             response, usage = self._post_fn(sample_ix, raw_output)
             accumulated_usage += TokenUsage(**usage)
 
-            tmp_dir = self._dump_to_temp(sample_ix=sample_ix, raw_content=raw_output)
             outputs[sample_ix] = response
-
             logger.debug(f"{sample_ix}-Completed evaluation")
 
             # abort if beyond capacity
@@ -152,7 +152,7 @@ def run_dataset(self, df: "pd.DataFrame", model: str = None, capacity: int = Non
                 logger.warning(f"Aborting run after {sample_ix}. Capacity exceeded: {accumulated_usage} > {max_usage}")
                 break
 
-        if tmp_dir is not None:
+        if self.tmp_dir is not None:
             logger.info(f"Dumped raw content to {tmp_dir}")
 
         return outputs, accumulated_usage
@@ -178,20 +178,21 @@ def _dump_to_temp(self, sample_ix, raw_content) -> Optional[Path]:
             return None
 
         datestamp = datetime.now().strftime("%Y%m%d-%Hh")  # Generate a timestamp in the format YYYYMMDD-hhmm
-        tmp_dir = Path(tempfile.gettempdir()) / "evaluation_logs" / f"{datestamp}"
-        tmp_dir.mkdir(parents=True, exist_ok=True)
+
+        if self.tmp_dir is None:
+            tmp_dir = Path(tempfile.gettempdir()) / "evaluation_logs" / f"{datestamp}"
+            tmp_dir.mkdir(parents=True, exist_ok=True)
+            self.tmp_dir = tmp_dir
 
         timestamp = datetime.now().strftime("%H%M%S")  # Generate a timestamp in the format hhmmss
-        filepath = tmp_dir / f"{sample_ix}_raw_{timestamp}.json"
+        filepath = self.tmp_dir / f"{sample_ix}_raw_{timestamp}.json"
 
         with open(filepath, "w") as f:
             # Write the raw content to the file
             if isinstance(raw_content, dict):
                 json.dump(raw_content, f)
             else:
-                f.write(raw_content)
-
-        return tmp_dir
+                f.write(str(raw_content))
 
     def post_process_default(self, sample_ix, openai_json: dict) -> tuple[dict, TokenUsage]:
         """
@@ -214,9 +215,13 @@ def post_process_default(self, sample_ix, openai_json: dict) -> tuple[dict, Toke
             as well as the usage information from response['usage'].
         """
         ix = 0  # assume N=1
+        try: #  Many providers have their own response objects, try to convert
+            openai_json = openai_json.dict()
+        except AttributeError:
+            pass
+
         try:
             raw_content = openai_json["choices"][ix]["message"]["content"]
-
             # Assume no nesting {}
             response = json.loads(raw_content[raw_content.find("{") : raw_content.find("}") + 1])  # noqa: E203
         except Exception:
@@ -224,6 +229,8 @@ def post_process_default(self, sample_ix, openai_json: dict) -> tuple[dict, Toke
             response = {}
 
         usage = openai_json.get("usage", {"completion_tokens": 0, "prompt_tokens": 0, "total_tokens": 0})
+
+        self._dump_to_temp(sample_ix=sample_ix, raw_content=openai_json)
         return response, usage
 
 
diff --git a/src/evaluation_instruments/post/__init__.py b/src/evaluation_instruments/post/__init__.py
index ca43f8e..75d1094 100644
--- a/src/evaluation_instruments/post/__init__.py
+++ b/src/evaluation_instruments/post/__init__.py
@@ -2,10 +2,31 @@
 
 
 def frame_from_evals(full_output: dict, criteria_outputs: list[str] = None) -> pd.DataFrame:
+    """
+    Convert the full output of the evaluation into a DataFrame.
+
+    Useful for when the evaluation returns multiple outputs per criteria, such as a score plus evidence.
+
+    Parameters
+    ----------
+    full_output : dict
+        The full output of the evaluation, typically a dictionary with keys as the primary keys and values as dictionaries
+        containing the criteria and their respective outputs.
+    criteria_outputs : list[str], optional
+        The names of the outputs to include in the DataFrame. If not provided, defaults to ["evidence", "score"].
+        Specify [] or a single valued list to return a DataFrame with only one level, the criteria names.
+
+    Returns
+    -------
+    pd.DataFrame
+        _description_
+    """
     if not full_output:
         return pd.DataFrame()
 
-    criteria_outputs = criteria_outputs or ["evidence", "score"]
+    criteria_outputs = criteria_outputs if criteria_outputs is not None else ["evidence", "score"]
+    if len(criteria_outputs) < 2:
+        return pd.DataFrame.from_dict(full_output, orient="index")
 
     reformatted = {}
     for pk, criteria in full_output.items():

From b806870778dda0848721dfddb7590ed4bb1df042 Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Thu, 1 May 2025 17:31:58 +0000
Subject: [PATCH 2/7] test and log

---
 changelog/5.fix.rst      |  2 ++
 tests/test_evaluation.py | 77 +++++++++++++++++++++++++++++++---------
 tests/test_post.py       | 24 +++++++++++++
 3 files changed, 87 insertions(+), 16 deletions(-)
 create mode 100644 changelog/5.fix.rst

diff --git a/changelog/5.fix.rst b/changelog/5.fix.rst
new file mode 100644
index 0000000..6a4e64c
--- /dev/null
+++ b/changelog/5.fix.rst
@@ -0,0 +1,2 @@
+Update default postproccessing to handle the objects returned by more providers.
+Update the results transform to handle single score objects.
diff --git a/tests/test_evaluation.py b/tests/test_evaluation.py
index 51f1627..6b292b0 100644
--- a/tests/test_evaluation.py
+++ b/tests/test_evaluation.py
@@ -7,24 +7,54 @@
 from evaluation_instruments._evaluation import Evaluation
 from evaluation_instruments.model import TokenUsage
 
-
-def fake_completion(prompt, **kwargs):
-    """Fake completion function to simulate OpenAI API response."""
+def example_dict():
     return {
-        "choices": [{"message": {"content": json.dumps({"result": "success"})}}],
-        "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
-    }
+            "choices": [{"message": {"content": json.dumps({"result": "success"})}}],
+            "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
+        }
+
+class CompletionObj:
+    def __init__(self, response_dict):
+        self.response_dict = response_dict
 
+    def dict(self):
+        return self.response_dict
+
+    def json(self):
+        return json.dumps(self.response_dict)
+
+@pytest.fixture
+def sample_evaluation_obj():
+    """
+    Create a basic evaluation instance with mock functions
+    Returns an object with a dict method, like Openai
+    """
+    prep_fn = MagicMock(return_value="test prompt")
+    completion_fn = MagicMock(
+        return_value=CompletionObj(example_dict())
+    )
+    post_fn = MagicMock(
+        return_value=({"result": "success"}, {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15})
+    )
+
+    return Evaluation(
+        prep_fn=prep_fn,
+        completion_fn=completion_fn,
+        post_process_fn=post_fn,
+        log_enabled=False,
+        model_args={},
+        max_tokens=1000,
+    )
 
 @pytest.fixture
 def sample_evaluation():
-    """Create a basic evaluation instance with mock functions."""
+    """
+    Create a basic evaluation instance with mock functions.
+    Directly returns a dictionary
+    """
     prep_fn = MagicMock(return_value="test prompt")
     completion_fn = MagicMock(
-        return_value={
-            "choices": [{"message": {"content": '{"result": "success"}'}}],
-            "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
-        }
+        return_value=example_dict()
     )
     post_fn = MagicMock(
         return_value=({"result": "success"}, {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15})
@@ -42,10 +72,7 @@ def sample_evaluation():
 
 def static_completion(prompt, **kwargs):
     """Fake completion function to simulate OpenAI API response."""
-    return {
-        "choices": [{"message": {"content": json.dumps({"result": "success"})}}],
-        "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
-    }
+    return example_dict()
 
 
 def static_prep(sample):
@@ -115,7 +142,7 @@ def test_toggle_logging(self, initial_state, sample_evaluation):
         assert result == (not initial_state)
 
     def test_run_dataset(self, sample_evaluation):
-        """Test the run_dataset method processes samples correctly."""
+        """Test that run_object returns the expected output."""
         # Create a mock DataFrame
         df = pd.DataFrame({"id": [1, 2], "data": ["test1", "test2"]})
 
@@ -132,6 +159,24 @@ def test_run_dataset(self, sample_evaluation):
         assert all(v == {"result": "success"} for v in outputs.values())
         assert usage == TokenUsage(20, 10, 30)
 
+    def test_run_dataset(self, sample_evaluation_obj):
+        """Test that run_object returns the expected output."""
+        # Create a mock DataFrame
+        df = pd.DataFrame({"id": [1, 2], "data": ["test1", "test2"]})
+
+        # Run the dataset
+        outputs, usage = sample_evaluation_obj.run_dataset(df)
+
+        # Check that prep_fn, completion_fn, and post_fn were called for each sample
+        assert sample_evaluation_obj._prep_fn.call_count == 2
+        assert sample_evaluation_obj._completion_fn.call_count == 2
+        assert sample_evaluation_obj._post_fn.call_count == 2
+
+        # Check outputs and usage
+        assert len(outputs) == 2
+        assert all(v == {"result": "success"} for v in outputs.values())
+        assert usage == TokenUsage(20, 10, 30)
+
     def test_run_dataset_empty(self, sample_evaluation, caplog):
         """Test the run_dataset method processes samples correctly."""
 
diff --git a/tests/test_post.py b/tests/test_post.py
index b0fe789..a72994d 100644
--- a/tests/test_post.py
+++ b/tests/test_post.py
@@ -89,3 +89,27 @@ def test_frame_from_evals_single_item():
 
     # Verify exact match with expected DataFrame
     assert_frame_equal(result_df, expected_df)
+
+def test_frame_from_evals_single_score_multiple_items():
+    """Test frame_from_evals with multiple items but only single score values (no evidence)."""
+    # Sample with single score per criteria, multiple items
+    eval_output = {
+        "sample1": {
+            "criteria1": 4,
+            "criteria2": 3,
+        },
+        "sample2": {
+            "criteria1": 5,
+            "criteria2": 2,
+        },
+    }
+    expected_df = pd.DataFrame(
+        {"criteria1": [4, 5], "criteria2": [3, 2]},
+        index=["sample1", "sample2"]
+    )
+
+    # Act
+    result_df = frame_from_evals(eval_output, [])
+
+    # Verify exact match with expected DataFrame
+    assert_frame_equal(result_df, expected_df)

From 28134e4baa871c9f3e707316e18884d4c719fcac Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Thu, 1 May 2025 17:38:12 +0000
Subject: [PATCH 3/7] have completion_fn use all kwargs

---
 src/evaluation_instruments/_evaluation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/evaluation_instruments/_evaluation.py b/src/evaluation_instruments/_evaluation.py
index c5ffe64..c383ccb 100644
--- a/src/evaluation_instruments/_evaluation.py
+++ b/src/evaluation_instruments/_evaluation.py
@@ -139,7 +139,7 @@ def run_dataset(self, df: "pd.DataFrame", model: str = None, capacity: int = Non
             prompt = self.prep_fn(sample)
 
             # Delegate
-            raw_output = self.completion_fn(model, prompt, **self._model_args)
+            raw_output = self.completion_fn(model=model, messages=prompt, **self._model_args)
 
             response, usage = self._post_fn(sample_ix, raw_output)
             accumulated_usage += TokenUsage(**usage)

From 4fbe26d6597cfca82ce880df51fae8e3d13b84a7 Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Thu, 1 May 2025 17:48:20 +0000
Subject: [PATCH 4/7] bad fragment type

---
 changelog/{5.fix.rst => 7.bugfix.rst} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename changelog/{5.fix.rst => 7.bugfix.rst} (100%)

diff --git a/changelog/5.fix.rst b/changelog/7.bugfix.rst
similarity index 100%
rename from changelog/5.fix.rst
rename to changelog/7.bugfix.rst

From 41d08caaef2198161f4d7c5589aab0e23d1ba87d Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Thu, 1 May 2025 18:22:12 +0000
Subject: [PATCH 5/7] more robust handling

---
 src/evaluation_instruments/_evaluation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/evaluation_instruments/_evaluation.py b/src/evaluation_instruments/_evaluation.py
index c383ccb..6950fce 100644
--- a/src/evaluation_instruments/_evaluation.py
+++ b/src/evaluation_instruments/_evaluation.py
@@ -216,10 +216,13 @@ def post_process_default(self, sample_ix, openai_json: dict) -> tuple[dict, Toke
         """
         ix = 0  # assume N=1
         try: #  Many providers have their own response objects, try to convert
-            openai_json = openai_json.dict()
+            openai_json = openai_json.json()
         except AttributeError:
             pass
 
+        if isinstance(openai_json, str):
+            openai_json = json.loads(openai_json)
+
         try:
             raw_content = openai_json["choices"][ix]["message"]["content"]
             # Assume no nesting {}

From 7aefe6487357a552b9608beb72ec2e67be2190bb Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Fri, 2 May 2025 14:27:39 +0000
Subject: [PATCH 6/7] =?UTF-8?q?=F0=9F=93=A6automate=20a=20path=20to=20keep?=
 =?UTF-8?q?=20keys=20in=20secrets=20&=20env?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .devcontainer/devcontainer.json                        | 2 +-
 .devcontainer/docker-compose.yml                       | 3 +++
 .devcontainer/image/stagefiles/layer.sh                | 8 ++++++++
 .devcontainer/image/stagefiles/python/requirements.txt | 1 +
 .devcontainer/post-create.sh                           | 1 +
 5 files changed, 14 insertions(+), 1 deletion(-)
 create mode 100755 .devcontainer/post-create.sh

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 5975fb6..548d5dd 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -44,7 +44,7 @@
     // Uncomment the next line if you want to keep your containers running after VS Code shuts down.
     // "shutdownAction": "none",
     // The next line to run commands after the container is created, it auto-installs precommit hooks and the library
-    // "postCreateCommand": "echo \"\\nif [ -f /home/seismo/workspace/.devcontainer/.bashrc ]; then\\n  . /home/seismo/workspace/.devcontainer/.bashrc;\\nfi\" >> $HOME/.bashrc",
+    "postCreateCommand": "/home/seismo/workspace/.devcontainer/post-create.sh",
     // Uncomment the next line to have VS Code connect as an existing non-root user in the container. See
     // https://aka.ms/vscode-remote/containers/non-root for details on adding a non-root user if none exist.
     //"remoteUser": "seismo"
diff --git a/.devcontainer/docker-compose.yml b/.devcontainer/docker-compose.yml
index 93b536b..5433bf8 100644
--- a/.devcontainer/docker-compose.yml
+++ b/.devcontainer/docker-compose.yml
@@ -11,6 +11,9 @@ services:
       # Update this to wherever you want VS Code to mount the folder of your project
       - ..:/home/seismo/workspace
 
+      # Mount certificates read-only
+      - /usr/local/share/ca-certificates:/usr/local/share/ca-certificates:ro
+
       # [From Extension] Forwards the local Docker socket to the container.
       - /var/run/docker.sock:/var/run/docker.sock
 
diff --git a/.devcontainer/image/stagefiles/layer.sh b/.devcontainer/image/stagefiles/layer.sh
index ffc8c3c..15dbeab 100644
--- a/.devcontainer/image/stagefiles/layer.sh
+++ b/.devcontainer/image/stagefiles/layer.sh
@@ -26,3 +26,11 @@ apt-get autoremove -y
 apt-get clean
 
 rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+# Append to seismo user's .bashrc to source secrets/.env if it exists
+echo '
+# Source secrets/.env if it exists
+if [ -f "$HOME/workspace/secrets/.env" ]; then
+    source "$HOME/workspace/secrets/.env"
+fi
+' >> /home/seismo/.bashrc
diff --git a/.devcontainer/image/stagefiles/python/requirements.txt b/.devcontainer/image/stagefiles/python/requirements.txt
index 5d077d6..5ff268d 100644
--- a/.devcontainer/image/stagefiles/python/requirements.txt
+++ b/.devcontainer/image/stagefiles/python/requirements.txt
@@ -1,4 +1,5 @@
 #seismometer
 #litellm==1.65.4
+pandas==2.2.3
 jupyterlab==4.4.1
 pre-commit==4.2.0
diff --git a/.devcontainer/post-create.sh b/.devcontainer/post-create.sh
new file mode 100755
index 0000000..8cbd569
--- /dev/null
+++ b/.devcontainer/post-create.sh
@@ -0,0 +1 @@
+pip install -e .

From e463c0b8e0b0d7bf5491bac0290038eef0355814 Mon Sep 17 00:00:00 2001
From: bwd <bdiehl@epic.com>
Date: Fri, 2 May 2025 16:53:08 +0000
Subject: [PATCH 7/7] use pathlib, os-agnostic

---
 changelog/7.bugfix.rst                        |  1 +
 .../prep/data_handler.py                      | 14 ++--
 tests/test_prep.py                            | 64 ++++++++-----------
 3 files changed, 35 insertions(+), 44 deletions(-)

diff --git a/changelog/7.bugfix.rst b/changelog/7.bugfix.rst
index 6a4e64c..33aa613 100644
--- a/changelog/7.bugfix.rst
+++ b/changelog/7.bugfix.rst
@@ -1,2 +1,3 @@
 Update default postproccessing to handle the objects returned by more providers.
 Update the results transform to handle single score objects.
+Update decorator to use pathlib not os
diff --git a/src/evaluation_instruments/prep/data_handler.py b/src/evaluation_instruments/prep/data_handler.py
index 4c5c47d..3a5891a 100644
--- a/src/evaluation_instruments/prep/data_handler.py
+++ b/src/evaluation_instruments/prep/data_handler.py
@@ -1,8 +1,8 @@
 import json
 import logging
-import os
 from functools import wraps
 from typing import Callable, Optional
+from pathlib import Path
 
 logger = logging.getLogger("evaluation")
 
@@ -38,14 +38,18 @@ def wrapped(sample: "namedtuple"):
             """
 
             # Get the file path from the namedtuple using the key
-            filename = getattr(sample, namedtuple_key) + ".json"
+            filename = Path(getattr(sample, namedtuple_key))
+            if not filename:
+                return fn(filename)
+
+            filename = filename.with_suffix(".json")
             if data_path:
-                filename = os.path.join(data_path, filename)
+                filename = Path(data_path) / filename
 
-            if not filename or not os.path.isfile(filename):
+            if not filename.is_file():
                 raw_json = {}
             else:  # Open the file and read its contents
-                with open(filename, "r") as file:
+                with filename.open("r") as file:
                     raw_json = json.load(file)
 
             # Call the original function with the file contents
diff --git a/tests/test_prep.py b/tests/test_prep.py
index c6f4b7b..44d0145 100644
--- a/tests/test_prep.py
+++ b/tests/test_prep.py
@@ -1,6 +1,7 @@
 import json
 from collections import namedtuple
 from unittest.mock import mock_open, patch
+from pathlib import PureWindowsPath, PurePosixPath
 
 import pytest
 
@@ -10,63 +11,48 @@
 class TestJsonFromColumnDecorator:
     """Tests for the json_from_column decorator functionality."""
 
-    def test_basic_functionality(self):
-        """Test basic json_from_column decorator functionality."""
-
-        # Create a test function to decorate
-        @undertest.json_from_column(namedtuple_key="file_id")
-        def test_fn(json_data):
-            return json_data
+    @pytest.mark.parametrize("data_path",[
+                             "data",
+                             "subdir/data",
+                             "subdir\\data",
+                             PurePosixPath("subdir/data"),
+                             PureWindowsPath("subdir\\data"),
+                              ])
+    def test_with_data_path(self, data_path, tmp_path):
+        """Test json_from_column with a data_path."""
+        expected_data = {"testkey": "value"}
+        expected_file = (tmp_path / data_path).with_suffix(".json")
+        (expected_file.parent).mkdir(parents=True, exist_ok=True)
+        with expected_file.open("w") as f:
+            f.write(json.dumps(expected_data))
 
-        # Create a mock sample with file_id
+        # Create a mock sample
         Sample = namedtuple("Sample", ["file_id"])
-        sample = Sample(file_id="test_id")
-
-        # Mock the open function to return a specific JSON
-        test_json = {"key": "value"}
-        m = mock_open(read_data=json.dumps(test_json))
+        sample = Sample(file_id=data_path)
 
-        with patch("builtins.open", m), patch("os.path.isfile", return_value=True):
-            result = test_fn(sample)
-            assert result == test_json
-            m.assert_called_once_with("test_id.json", "r")
-
-    def test_with_data_path(self):
-        """Test json_from_column with a data_path."""
 
+        # Act
         # Create a test function with data path
-        @undertest.json_from_column(namedtuple_key="file_id", data_path="/data")
+        @undertest.json_from_column(namedtuple_key="file_id", data_path=tmp_path)
         def test_fn(json_data):
             return json_data
+        result = test_fn(sample)
 
-        # Create a mock sample
-        Sample = namedtuple("Sample", ["file_id"])
-        sample = Sample(file_id="test_id")
-
-        # Mock the open function
-        test_json = {"key": "value"}
-        m = mock_open(read_data=json.dumps(test_json))
-
-        with patch("builtins.open", m), patch("os.path.isfile", return_value=True), patch(
-            "os.path.join", return_value="/data/test_id.json"
-        ):
-            result = test_fn(sample)
-            assert result == test_json
-            m.assert_called_once_with("/data/test_id.json", "r")
+        # Assert
+        assert result == {"testkey": "value"}
 
     def test_file_not_found(self):
         """Test json_from_column when the file is not found."""
 
-        @undertest.json_from_column(namedtuple_key="file_id")
+        @undertest.json_from_column(namedtuple_key="file_id", data_path="nonexistent_path")
         def test_fn(json_data):
             return json_data
 
         Sample = namedtuple("Sample", ["file_id"])
         sample = Sample(file_id="nonexistent_id")
 
-        with patch("os.path.isfile", return_value=False):
-            result = test_fn(sample)
-            assert result == {}
+        result = test_fn(sample)
+        assert result == {}
 
     def test_missing_key(self):
         """Test json_from_column raises ValueError if namedtuple_key is not provided."""