Skip to content

Commit bbaf171

Browse files
authored
Refactor: Deduplicate normalize_file_path and to_epoch_ms in export script (closes #46) (#51)
* removed duplicated function from `export.py`, added test for normalize file path util function * fix: align windows-drive case expectation in URI test * fix: use lowercase only for driver letter * fix: review comments
1 parent 04d57fc commit bbaf171

2 files changed

Lines changed: 146 additions & 40 deletions

File tree

scripts/export.py

Lines changed: 5 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,11 @@
3030
build_searchable_text,
3131
is_excluded_by_rules,
3232
)
33-
from utils.path_helpers import get_workspace_folder_paths as _shared_get_workspace_folder_paths # noqa: E402
33+
from utils.path_helpers import ( # noqa: E402
34+
get_workspace_folder_paths as _shared_get_workspace_folder_paths,
35+
normalize_file_path,
36+
to_epoch_ms,
37+
)
3438
from utils.tool_parser import parse_tool_call # noqa: E402
3539
from utils.workspace_path import get_cli_chats_path # noqa: E402
3640
from utils.cli_chat_reader import ( # noqa: E402
@@ -141,45 +145,6 @@ def get_global_state_dir() -> str:
141145
return os.path.join(str(Path.home()), ".cursor-chat-browser")
142146

143147

144-
def normalize_file_path(p: str) -> str:
145-
n = re.sub(r"^file:///", "", p or "")
146-
n = re.sub(r"^file://", "", n)
147-
try:
148-
from urllib.parse import unquote
149-
n = unquote(n)
150-
except Exception:
151-
pass
152-
if sys.platform == "win32":
153-
n = n.replace("/", "\\")
154-
n = re.sub(r"^\\([a-zA-Z]:)", r"\1", n)
155-
n = n.lower()
156-
return n
157-
158-
159-
def to_epoch_ms(value) -> int:
160-
"""Convert a timestamp (int, float, or ISO-8601 string) to epoch ms."""
161-
if value is None:
162-
return 0
163-
if isinstance(value, (int, float)):
164-
if value > 1e12:
165-
return int(value)
166-
if value > 0:
167-
return int(value * 1000)
168-
return 0
169-
if isinstance(value, str):
170-
try:
171-
cleaned = value.rstrip("Z") + "+00:00" if value.endswith("Z") else value
172-
dt = datetime.fromisoformat(cleaned)
173-
return int(dt.timestamp() * 1000)
174-
except Exception:
175-
pass
176-
try:
177-
return to_epoch_ms(float(value))
178-
except Exception:
179-
pass
180-
return 0
181-
182-
183148
def slug(s: str) -> str:
184149
s = re.sub(r'[<>:"/\\|?*]', "_", s or "")
185150
s = re.sub(r"\s+", "-", s)

tests/test_normalize_file_path.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""Tests for utils.path_helpers path/timestamp helpers (closes #46).
2+
3+
Covers ``normalize_file_path`` and ``to_epoch_ms``, both previously duplicated
4+
in scripts/export.py. All call-sites in the web app and CLI export script now
5+
use the shared implementations in utils.path_helpers.
6+
7+
Test inventory (this module only): 21 cases — 12 ``normalize_file_path``,
8+
9 ``to_epoch_ms``. On win32, 2 cases skip (POSIX passthrough in
9+
``TestNormalizeFilePathPosixPassthrough`` only). A full-suite run may report
10+
more skips (e.g. ``skipped=4``) from other test modules, not this file.
11+
"""
12+
13+
import sys
14+
import unittest
15+
from datetime import datetime, timezone
16+
17+
from utils.path_helpers import normalize_file_path, to_epoch_ms
18+
19+
20+
class TestNormalizeFilePathUriStripping(unittest.TestCase):
21+
def test_file_triple_slash_stripped(self) -> None:
22+
out = normalize_file_path("file:///home/user/project")
23+
self.assertFalse(out.startswith("file:"))
24+
self.assertIn("home", out)
25+
26+
def test_file_double_slash_stripped(self) -> None:
27+
out = normalize_file_path("file://server/share/file.txt")
28+
self.assertFalse(out.startswith("file:"))
29+
self.assertIn("share", out)
30+
31+
def test_empty_string(self) -> None:
32+
self.assertEqual(normalize_file_path(""), "")
33+
34+
35+
class TestNormalizeFilePathPercentEncoding(unittest.TestCase):
36+
def test_space_decoded(self) -> None:
37+
out = normalize_file_path("file:///C:/My%20Documents/file.txt")
38+
self.assertNotIn("%20", out)
39+
self.assertIn("my documents", out)
40+
41+
def test_hash_decoded(self) -> None:
42+
out = normalize_file_path("file:///C:/repo/src%23internal/mod.py")
43+
self.assertNotIn("%23", out)
44+
self.assertIn("#", out)
45+
46+
def test_percent_encoded_colon_in_uri_prefix(self) -> None:
47+
"""URI-style /d%3A/... path: %3A is decoded to ':'.
48+
49+
Only test that exercises the leading-``/`` + drive-letter shape end-to-end
50+
(Cursor sometimes stores ``/d%3A/...`` URIs). Other drive-path tests use
51+
``D:/...`` or ``D:\\...`` without a leading slash.
52+
53+
On win32 the win32 branch strips the leading slash, lowercases, and
54+
normalises to backslashes. On other platforms the leading ``/`` prevents
55+
the ``^[a-zA-Z]:[/\\]`` cross-platform branch in ``path_helpers``, so the
56+
path is returned as percent-decoded only (no slash flip / lowercasing).
57+
"""
58+
out = normalize_file_path("/d%3A/_Work/project")
59+
self.assertNotIn("%3A", out)
60+
if sys.platform == "win32":
61+
self.assertEqual(out, r"d:\_work\project")
62+
else:
63+
self.assertEqual(out, "/d:/_Work/project")
64+
65+
66+
class TestNormalizeFilePathWindowsDrives(unittest.TestCase):
67+
"""Paths with Windows-style drive letters are normalised on all platforms.
68+
69+
On win32 the win32 branch handles them natively. On Linux/macOS the
70+
``^[a-zA-Z]:[/\\]`` regex branch converts forward-slashes to backslashes
71+
and lowercases the path so cross-platform reads of Cursor's Windows
72+
workspaceStorage produce consistent keys.
73+
"""
74+
75+
def test_backslash_drive_path_lowercased(self) -> None:
76+
out = normalize_file_path(r"D:\Work\Boost")
77+
self.assertEqual(out, r"d:\work\boost")
78+
79+
def test_forward_slash_drive_path_converted(self) -> None:
80+
out = normalize_file_path("D:/Work/Boost")
81+
self.assertEqual(out, r"d:\work\boost")
82+
83+
def test_file_uri_with_windows_drive(self) -> None:
84+
out = normalize_file_path("file:///C:/Users/Dev/project")
85+
# file:/// stripped, then same drive-letter branch as D:/ and D:\ inputs.
86+
self.assertEqual(out, r"c:\users\dev\project")
87+
88+
def test_mixed_case_drive_lowercased(self) -> None:
89+
out = normalize_file_path(r"E:\Mixed\Case\Path")
90+
self.assertTrue(out.startswith("e:"))
91+
self.assertEqual(out, r"e:\mixed\case\path")
92+
93+
94+
class TestNormalizeFilePathPosixPassthrough(unittest.TestCase):
95+
def test_plain_posix_path_unchanged_on_non_windows(self) -> None:
96+
if sys.platform == "win32":
97+
self.skipTest("POSIX path semantics differ on win32")
98+
out = normalize_file_path("/home/user/project")
99+
self.assertEqual(out, "/home/user/project")
100+
101+
def test_path_without_scheme_unchanged(self) -> None:
102+
if sys.platform == "win32":
103+
self.skipTest("plain relative path behaviour differs on win32")
104+
out = normalize_file_path("relative/path/file.py")
105+
self.assertEqual(out, "relative/path/file.py")
106+
107+
108+
class TestToEpochMs(unittest.TestCase):
109+
def test_none_returns_zero(self) -> None:
110+
self.assertEqual(to_epoch_ms(None), 0)
111+
112+
def test_ms_int_passthrough(self) -> None:
113+
self.assertEqual(to_epoch_ms(1_700_000_000_000), 1_700_000_000_000)
114+
115+
def test_seconds_int_converted_to_ms(self) -> None:
116+
self.assertEqual(to_epoch_ms(1_700_000_000), 1_700_000_000_000)
117+
118+
def test_seconds_float_converted_to_ms(self) -> None:
119+
self.assertEqual(to_epoch_ms(1_700_000_000.5), 1_700_000_000_500)
120+
121+
def test_zero_returns_zero(self) -> None:
122+
self.assertEqual(to_epoch_ms(0), 0)
123+
124+
def test_iso8601_zulu(self) -> None:
125+
expected = int(
126+
datetime(2026, 2, 3, 20, 39, 54, 17_000, tzinfo=timezone.utc).timestamp() * 1000
127+
)
128+
self.assertEqual(to_epoch_ms("2026-02-03T20:39:54.017Z"), expected)
129+
130+
def test_numeric_string_already_ms(self) -> None:
131+
self.assertEqual(to_epoch_ms("1700000000000"), 1_700_000_000_000)
132+
133+
def test_numeric_string_seconds(self) -> None:
134+
self.assertEqual(to_epoch_ms("1700000000"), 1_700_000_000_000)
135+
136+
def test_unrecognised_string_returns_zero(self) -> None:
137+
self.assertEqual(to_epoch_ms("not-a-timestamp"), 0)
138+
139+
140+
if __name__ == "__main__":
141+
unittest.main()

0 commit comments

Comments
 (0)