From a713ef44272359588b46f5b980d1a757bb2bfdbd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 2 Feb 2026 19:50:56 +0500 Subject: [PATCH 01/25] chore(deps): update project deps --- codeclone.baseline.json | 6 +++ tests/test_html_report.py | 44 +++++++++++++++++++ uv.lock | 92 +++++++++++++++++++-------------------- 3 files changed, 96 insertions(+), 46 deletions(-) create mode 100644 codeclone.baseline.json create mode 100644 tests/test_html_report.py diff --git a/codeclone.baseline.json b/codeclone.baseline.json new file mode 100644 index 0000000..a4ac58f --- /dev/null +++ b/codeclone.baseline.json @@ -0,0 +1,6 @@ +{ + "functions": [ + "7d573fa56fb11050f1642f18ca4bb3225e11e194|0-19" + ], + "blocks": [] +} \ No newline at end of file diff --git a/tests/test_html_report.py b/tests/test_html_report.py new file mode 100644 index 0000000..f3e7287 --- /dev/null +++ b/tests/test_html_report.py @@ -0,0 +1,44 @@ +from codeclone.html_report import build_html_report + +def test_html_report_smoke(): + func_groups = { + "hash1": [ + {"qualname": "f1", "filepath": "a.py", "start_line": 1, "end_line": 10}, + {"qualname": "f2", "filepath": "b.py", "start_line": 1, "end_line": 10}, + ] + } + block_groups = {} + + # We need to mock _FileCache or create dummy files because _render_code_block reads files + # Actually _render_code_block reads real files. + # We can create dummy files. + + import pytest + from pathlib import Path + + # Using pytest fixture directly in test function? No, need to pass it. + +def test_html_report_generation(tmp_path): + f1 = tmp_path / "a.py" + f1.write_text("def f1():\n pass\n") + f2 = tmp_path / "b.py" + f2.write_text("def f2():\n pass\n") + + func_groups = { + "hash1": [ + {"qualname": "f1", "filepath": str(f1), "start_line": 1, "end_line": 2}, + {"qualname": "f2", "filepath": str(f2), "start_line": 1, "end_line": 2}, + ] + } + + html = build_html_report( + func_groups=func_groups, + block_groups={}, + title="Test Report" + ) + + assert "" in html + assert "Test Report" in html + assert "f1" in html + assert "f2" in html + assert "svg" in html # Check if SVGs are present diff --git a/uv.lock b/uv.lock index caea671..6f50ae3 100644 --- a/uv.lock +++ b/uv.lock @@ -217,51 +217,51 @@ wheels = [ [[package]] name = "cryptography" -version = "46.0.3" +version = "46.0.4" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cffi", marker = "platform_python_implementation != 'PyPy'" }, { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/9f/33/c00162f49c0e2fe8064a62cb92b93e50c74a72bc370ab92f86112b33ff62/cryptography-46.0.3.tar.gz", hash = "sha256:a8b17438104fed022ce745b362294d9ce35b4c2e45c1d958ad4a4b019285f4a1", size = 749258, upload-time = "2025-10-15T23:18:31.74Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/1c/67/38769ca6b65f07461eb200e85fc1639b438bdc667be02cf7f2cd6a64601c/cryptography-46.0.3-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:09859af8466b69bc3c27bdf4f5d84a665e0f7ab5088412e9e2ec49758eca5cbc", size = 4296667, upload-time = "2025-10-15T23:16:54.369Z" }, - { url = "https://files.pythonhosted.org/packages/5c/49/498c86566a1d80e978b42f0d702795f69887005548c041636df6ae1ca64c/cryptography-46.0.3-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:01ca9ff2885f3acc98c29f1860552e37f6d7c7d013d7334ff2a9de43a449315d", size = 4450807, upload-time = "2025-10-15T23:16:56.414Z" }, - { url = "https://files.pythonhosted.org/packages/4b/0a/863a3604112174c8624a2ac3c038662d9e59970c7f926acdcfaed8d61142/cryptography-46.0.3-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:6eae65d4c3d33da080cff9c4ab1f711b15c1d9760809dad6ea763f3812d254cb", size = 4299615, upload-time = "2025-10-15T23:16:58.442Z" }, - { url = "https://files.pythonhosted.org/packages/64/02/b73a533f6b64a69f3cd3872acb6ebc12aef924d8d103133bb3ea750dc703/cryptography-46.0.3-cp311-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:e5bf0ed4490068a2e72ac03d786693adeb909981cc596425d09032d372bcc849", size = 4016800, upload-time = "2025-10-15T23:17:00.378Z" }, - { url = "https://files.pythonhosted.org/packages/25/d5/16e41afbfa450cde85a3b7ec599bebefaef16b5c6ba4ec49a3532336ed72/cryptography-46.0.3-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5ecfccd2329e37e9b7112a888e76d9feca2347f12f37918facbb893d7bb88ee8", size = 4984707, upload-time = "2025-10-15T23:17:01.98Z" }, - { url = "https://files.pythonhosted.org/packages/c9/56/e7e69b427c3878352c2fb9b450bd0e19ed552753491d39d7d0a2f5226d41/cryptography-46.0.3-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a2c0cd47381a3229c403062f764160d57d4d175e022c1df84e168c6251a22eec", size = 4482541, upload-time = "2025-10-15T23:17:04.078Z" }, - { url = "https://files.pythonhosted.org/packages/78/f6/50736d40d97e8483172f1bb6e698895b92a223dba513b0ca6f06b2365339/cryptography-46.0.3-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:549e234ff32571b1f4076ac269fcce7a808d3bf98b76c8dd560e42dbc66d7d91", size = 4299464, upload-time = "2025-10-15T23:17:05.483Z" }, - { url = "https://files.pythonhosted.org/packages/00/de/d8e26b1a855f19d9994a19c702fa2e93b0456beccbcfe437eda00e0701f2/cryptography-46.0.3-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:c0a7bb1a68a5d3471880e264621346c48665b3bf1c3759d682fc0864c540bd9e", size = 4950838, upload-time = "2025-10-15T23:17:07.425Z" }, - { url = "https://files.pythonhosted.org/packages/8f/29/798fc4ec461a1c9e9f735f2fc58741b0daae30688f41b2497dcbc9ed1355/cryptography-46.0.3-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:10b01676fc208c3e6feeb25a8b83d81767e8059e1fe86e1dc62d10a3018fa926", size = 4481596, upload-time = "2025-10-15T23:17:09.343Z" }, - { url = "https://files.pythonhosted.org/packages/15/8d/03cd48b20a573adfff7652b76271078e3045b9f49387920e7f1f631d125e/cryptography-46.0.3-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:0abf1ffd6e57c67e92af68330d05760b7b7efb243aab8377e583284dbab72c71", size = 4426782, upload-time = "2025-10-15T23:17:11.22Z" }, - { url = "https://files.pythonhosted.org/packages/fa/b1/ebacbfe53317d55cf33165bda24c86523497a6881f339f9aae5c2e13e57b/cryptography-46.0.3-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a04bee9ab6a4da801eb9b51f1b708a1b5b5c9eb48c03f74198464c66f0d344ac", size = 4698381, upload-time = "2025-10-15T23:17:12.829Z" }, - { url = "https://files.pythonhosted.org/packages/73/dc/9aa866fbdbb95b02e7f9d086f1fccfeebf8953509b87e3f28fff927ff8a0/cryptography-46.0.3-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c8daeb2d2174beb4575b77482320303f3d39b8e81153da4f0fb08eb5fe86a6c5", size = 4288728, upload-time = "2025-10-15T23:17:21.527Z" }, - { url = "https://files.pythonhosted.org/packages/c5/fd/bc1daf8230eaa075184cbbf5f8cd00ba9db4fd32d63fb83da4671b72ed8a/cryptography-46.0.3-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:39b6755623145ad5eff1dab323f4eae2a32a77a7abef2c5089a04a3d04366715", size = 4435078, upload-time = "2025-10-15T23:17:23.042Z" }, - { url = "https://files.pythonhosted.org/packages/82/98/d3bd5407ce4c60017f8ff9e63ffee4200ab3e23fe05b765cab805a7db008/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:db391fa7c66df6762ee3f00c95a89e6d428f4d60e7abc8328f4fe155b5ac6e54", size = 4293460, upload-time = "2025-10-15T23:17:24.885Z" }, - { url = "https://files.pythonhosted.org/packages/26/e9/e23e7900983c2b8af7a08098db406cf989d7f09caea7897e347598d4cd5b/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:78a97cf6a8839a48c49271cdcbd5cf37ca2c1d6b7fdd86cc864f302b5e9bf459", size = 3995237, upload-time = "2025-10-15T23:17:26.449Z" }, - { url = "https://files.pythonhosted.org/packages/91/15/af68c509d4a138cfe299d0d7ddb14afba15233223ebd933b4bbdbc7155d3/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:dfb781ff7eaa91a6f7fd41776ec37c5853c795d3b358d4896fdbb5df168af422", size = 4967344, upload-time = "2025-10-15T23:17:28.06Z" }, - { url = "https://files.pythonhosted.org/packages/ca/e3/8643d077c53868b681af077edf6b3cb58288b5423610f21c62aadcbe99f4/cryptography-46.0.3-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:6f61efb26e76c45c4a227835ddeae96d83624fb0d29eb5df5b96e14ed1a0afb7", size = 4466564, upload-time = "2025-10-15T23:17:29.665Z" }, - { url = "https://files.pythonhosted.org/packages/0e/43/c1e8726fa59c236ff477ff2b5dc071e54b21e5a1e51aa2cee1676f1c986f/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:23b1a8f26e43f47ceb6d6a43115f33a5a37d57df4ea0ca295b780ae8546e8044", size = 4292415, upload-time = "2025-10-15T23:17:31.686Z" }, - { url = "https://files.pythonhosted.org/packages/42/f9/2f8fefdb1aee8a8e3256a0568cffc4e6d517b256a2fe97a029b3f1b9fe7e/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:b419ae593c86b87014b9be7396b385491ad7f320bde96826d0dd174459e54665", size = 4931457, upload-time = "2025-10-15T23:17:33.478Z" }, - { url = "https://files.pythonhosted.org/packages/79/30/9b54127a9a778ccd6d27c3da7563e9f2d341826075ceab89ae3b41bf5be2/cryptography-46.0.3-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:50fc3343ac490c6b08c0cf0d704e881d0d660be923fd3076db3e932007e726e3", size = 4466074, upload-time = "2025-10-15T23:17:35.158Z" }, - { url = "https://files.pythonhosted.org/packages/ac/68/b4f4a10928e26c941b1b6a179143af9f4d27d88fe84a6a3c53592d2e76bf/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:22d7e97932f511d6b0b04f2bfd818d73dcd5928db509460aaf48384778eb6d20", size = 4420569, upload-time = "2025-10-15T23:17:37.188Z" }, - { url = "https://files.pythonhosted.org/packages/a3/49/3746dab4c0d1979888f125226357d3262a6dd40e114ac29e3d2abdf1ec55/cryptography-46.0.3-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:d55f3dffadd674514ad19451161118fd010988540cee43d8bc20675e775925de", size = 4681941, upload-time = "2025-10-15T23:17:39.236Z" }, - { url = "https://files.pythonhosted.org/packages/27/32/b68d27471372737054cbd34c84981f9edbc24fe67ca225d389799614e27f/cryptography-46.0.3-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:4b7387121ac7d15e550f5cb4a43aef2559ed759c35df7336c402bb8275ac9683", size = 4294089, upload-time = "2025-10-15T23:17:48.269Z" }, - { url = "https://files.pythonhosted.org/packages/26/42/fa8389d4478368743e24e61eea78846a0006caffaf72ea24a15159215a14/cryptography-46.0.3-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:15ab9b093e8f09daab0f2159bb7e47532596075139dd74365da52ecc9cb46c5d", size = 4440029, upload-time = "2025-10-15T23:17:49.837Z" }, - { url = "https://files.pythonhosted.org/packages/5f/eb/f483db0ec5ac040824f269e93dd2bd8a21ecd1027e77ad7bdf6914f2fd80/cryptography-46.0.3-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:46acf53b40ea38f9c6c229599a4a13f0d46a6c3fa9ef19fc1a124d62e338dfa0", size = 4297222, upload-time = "2025-10-15T23:17:51.357Z" }, - { url = "https://files.pythonhosted.org/packages/fd/cf/da9502c4e1912cb1da3807ea3618a6829bee8207456fbbeebc361ec38ba3/cryptography-46.0.3-cp38-abi3-manylinux_2_28_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:10ca84c4668d066a9878890047f03546f3ae0a6b8b39b697457b7757aaf18dbc", size = 4012280, upload-time = "2025-10-15T23:17:52.964Z" }, - { url = "https://files.pythonhosted.org/packages/6b/8f/9adb86b93330e0df8b3dcf03eae67c33ba89958fc2e03862ef1ac2b42465/cryptography-46.0.3-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:36e627112085bb3b81b19fed209c05ce2a52ee8b15d161b7c643a7d5a88491f3", size = 4978958, upload-time = "2025-10-15T23:17:54.965Z" }, - { url = "https://files.pythonhosted.org/packages/d1/a0/5fa77988289c34bdb9f913f5606ecc9ada1adb5ae870bd0d1054a7021cc4/cryptography-46.0.3-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1000713389b75c449a6e979ffc7dcc8ac90b437048766cef052d4d30b8220971", size = 4473714, upload-time = "2025-10-15T23:17:56.754Z" }, - { url = "https://files.pythonhosted.org/packages/14/e5/fc82d72a58d41c393697aa18c9abe5ae1214ff6f2a5c18ac470f92777895/cryptography-46.0.3-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:b02cf04496f6576afffef5ddd04a0cb7d49cf6be16a9059d793a30b035f6b6ac", size = 4296970, upload-time = "2025-10-15T23:17:58.588Z" }, - { url = "https://files.pythonhosted.org/packages/78/06/5663ed35438d0b09056973994f1aec467492b33bd31da36e468b01ec1097/cryptography-46.0.3-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:71e842ec9bc7abf543b47cf86b9a743baa95f4677d22baa4c7d5c69e49e9bc04", size = 4940236, upload-time = "2025-10-15T23:18:00.897Z" }, - { url = "https://files.pythonhosted.org/packages/fc/59/873633f3f2dcd8a053b8dd1d38f783043b5fce589c0f6988bf55ef57e43e/cryptography-46.0.3-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:402b58fc32614f00980b66d6e56a5b4118e6cb362ae8f3fda141ba4689bd4506", size = 4472642, upload-time = "2025-10-15T23:18:02.749Z" }, - { url = "https://files.pythonhosted.org/packages/3d/39/8e71f3930e40f6877737d6f69248cf74d4e34b886a3967d32f919cc50d3b/cryptography-46.0.3-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:ef639cb3372f69ec44915fafcd6698b6cc78fbe0c2ea41be867f6ed612811963", size = 4423126, upload-time = "2025-10-15T23:18:04.85Z" }, - { url = "https://files.pythonhosted.org/packages/cd/c7/f65027c2810e14c3e7268353b1681932b87e5a48e65505d8cc17c99e36ae/cryptography-46.0.3-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:3b51b8ca4f1c6453d8829e1eb7299499ca7f313900dd4d89a24b8b87c0a780d4", size = 4686573, upload-time = "2025-10-15T23:18:06.908Z" }, - { url = "https://files.pythonhosted.org/packages/da/38/f59940ec4ee91e93d3311f7532671a5cef5570eb04a144bf203b58552d11/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:191bb60a7be5e6f54e30ba16fdfae78ad3a342a0599eb4193ba88e3f3d6e185b", size = 4243992, upload-time = "2025-10-15T23:18:18.695Z" }, - { url = "https://files.pythonhosted.org/packages/b0/0c/35b3d92ddebfdfda76bb485738306545817253d0a3ded0bfe80ef8e67aa5/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:c70cc23f12726be8f8bc72e41d5065d77e4515efae3690326764ea1b07845cfb", size = 4409944, upload-time = "2025-10-15T23:18:20.597Z" }, - { url = "https://files.pythonhosted.org/packages/99/55/181022996c4063fc0e7666a47049a1ca705abb9c8a13830f074edb347495/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:9394673a9f4de09e28b5356e7fff97d778f8abad85c9d5ac4a4b7e25a0de7717", size = 4242957, upload-time = "2025-10-15T23:18:22.18Z" }, - { url = "https://files.pythonhosted.org/packages/ba/af/72cd6ef29f9c5f731251acadaeb821559fe25f10852f44a63374c9ca08c1/cryptography-46.0.3-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:94cd0549accc38d1494e1f8de71eca837d0509d0d44bf11d158524b0e12cebf9", size = 4409447, upload-time = "2025-10-15T23:18:24.209Z" }, +sdist = { url = "https://files.pythonhosted.org/packages/78/19/f748958276519adf6a0c1e79e7b8860b4830dda55ccdf29f2719b5fc499c/cryptography-46.0.4.tar.gz", hash = "sha256:bfd019f60f8abc2ed1b9be4ddc21cfef059c841d86d710bb69909a688cbb8f59", size = 749301, upload-time = "2026-01-28T00:24:37.379Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/87/91/874b8910903159043b5c6a123b7e79c4559ddd1896e38967567942635778/cryptography-46.0.4-cp311-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5f14fba5bf6f4390d7ff8f086c566454bff0411f6d8aa7af79c88b6f9267aecc", size = 4275871, upload-time = "2026-01-28T00:23:09.439Z" }, + { url = "https://files.pythonhosted.org/packages/c0/35/690e809be77896111f5b195ede56e4b4ed0435b428c2f2b6d35046fbb5e8/cryptography-46.0.4-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:47bcd19517e6389132f76e2d5303ded6cf3f78903da2158a671be8de024f4cd0", size = 4423124, upload-time = "2026-01-28T00:23:11.529Z" }, + { url = "https://files.pythonhosted.org/packages/1a/5b/a26407d4f79d61ca4bebaa9213feafdd8806dc69d3d290ce24996d3cfe43/cryptography-46.0.4-cp311-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:01df4f50f314fbe7009f54046e908d1754f19d0c6d3070df1e6268c5a4af09fa", size = 4277090, upload-time = "2026-01-28T00:23:13.123Z" }, + { url = "https://files.pythonhosted.org/packages/0c/d8/4bb7aec442a9049827aa34cee1aa83803e528fa55da9a9d45d01d1bb933e/cryptography-46.0.4-cp311-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:5aa3e463596b0087b3da0dbe2b2487e9fc261d25da85754e30e3b40637d61f81", size = 4947652, upload-time = "2026-01-28T00:23:14.554Z" }, + { url = "https://files.pythonhosted.org/packages/2b/08/f83e2e0814248b844265802d081f2fac2f1cbe6cd258e72ba14ff006823a/cryptography-46.0.4-cp311-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:0a9ad24359fee86f131836a9ac3bffc9329e956624a2d379b613f8f8abaf5255", size = 4455157, upload-time = "2026-01-28T00:23:16.443Z" }, + { url = "https://files.pythonhosted.org/packages/0a/05/19d849cf4096448779d2dcc9bb27d097457dac36f7273ffa875a93b5884c/cryptography-46.0.4-cp311-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:dc1272e25ef673efe72f2096e92ae39dea1a1a450dd44918b15351f72c5a168e", size = 3981078, upload-time = "2026-01-28T00:23:17.838Z" }, + { url = "https://files.pythonhosted.org/packages/e6/89/f7bac81d66ba7cde867a743ea5b37537b32b5c633c473002b26a226f703f/cryptography-46.0.4-cp311-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:de0f5f4ec8711ebc555f54735d4c673fc34b65c44283895f1a08c2b49d2fd99c", size = 4276213, upload-time = "2026-01-28T00:23:19.257Z" }, + { url = "https://files.pythonhosted.org/packages/da/9f/7133e41f24edd827020ad21b068736e792bc68eecf66d93c924ad4719fb3/cryptography-46.0.4-cp311-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:eeeb2e33d8dbcccc34d64651f00a98cb41b2dc69cef866771a5717e6734dfa32", size = 4912190, upload-time = "2026-01-28T00:23:21.244Z" }, + { url = "https://files.pythonhosted.org/packages/a6/f7/6d43cbaddf6f65b24816e4af187d211f0bc536a29961f69faedc48501d8e/cryptography-46.0.4-cp311-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:3d425eacbc9aceafd2cb429e42f4e5d5633c6f873f5e567077043ef1b9bbf616", size = 4454641, upload-time = "2026-01-28T00:23:22.866Z" }, + { url = "https://files.pythonhosted.org/packages/9e/4f/ebd0473ad656a0ac912a16bd07db0f5d85184924e14fc88feecae2492834/cryptography-46.0.4-cp311-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:91627ebf691d1ea3976a031b61fb7bac1ccd745afa03602275dda443e11c8de0", size = 4405159, upload-time = "2026-01-28T00:23:25.278Z" }, + { url = "https://files.pythonhosted.org/packages/d1/f7/7923886f32dc47e27adeff8246e976d77258fd2aa3efdd1754e4e323bf49/cryptography-46.0.4-cp311-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:2d08bc22efd73e8854b0b7caff402d735b354862f1145d7be3b9c0f740fef6a0", size = 4666059, upload-time = "2026-01-28T00:23:26.766Z" }, + { url = "https://files.pythonhosted.org/packages/f8/f5/559c25b77f40b6bf828eabaf988efb8b0e17b573545edb503368ca0a2a03/cryptography-46.0.4-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:078e5f06bd2fa5aea5a324f2a09f914b1484f1d0c2a4d6a8a28c74e72f65f2da", size = 4264508, upload-time = "2026-01-28T00:23:34.264Z" }, + { url = "https://files.pythonhosted.org/packages/49/a1/551fa162d33074b660dc35c9bc3616fefa21a0e8c1edd27b92559902e408/cryptography-46.0.4-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:dce1e4f068f03008da7fa51cc7abc6ddc5e5de3e3d1550334eaf8393982a5829", size = 4409080, upload-time = "2026-01-28T00:23:35.793Z" }, + { url = "https://files.pythonhosted.org/packages/b0/6a/4d8d129a755f5d6df1bbee69ea2f35ebfa954fa1847690d1db2e8bca46a5/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:2067461c80271f422ee7bdbe79b9b4be54a5162e90345f86a23445a0cf3fd8a2", size = 4270039, upload-time = "2026-01-28T00:23:37.263Z" }, + { url = "https://files.pythonhosted.org/packages/4c/f5/ed3fcddd0a5e39321e595e144615399e47e7c153a1fb8c4862aec3151ff9/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_ppc64le.whl", hash = "sha256:c92010b58a51196a5f41c3795190203ac52edfd5dc3ff99149b4659eba9d2085", size = 4926748, upload-time = "2026-01-28T00:23:38.884Z" }, + { url = "https://files.pythonhosted.org/packages/43/ae/9f03d5f0c0c00e85ecb34f06d3b79599f20630e4db91b8a6e56e8f83d410/cryptography-46.0.4-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:829c2b12bbc5428ab02d6b7f7e9bbfd53e33efd6672d21341f2177470171ad8b", size = 4442307, upload-time = "2026-01-28T00:23:40.56Z" }, + { url = "https://files.pythonhosted.org/packages/8b/22/e0f9f2dae8040695103369cf2283ef9ac8abe4d51f68710bec2afd232609/cryptography-46.0.4-cp314-cp314t-manylinux_2_31_armv7l.whl", hash = "sha256:62217ba44bf81b30abaeda1488686a04a702a261e26f87db51ff61d9d3510abd", size = 3959253, upload-time = "2026-01-28T00:23:42.827Z" }, + { url = "https://files.pythonhosted.org/packages/01/5b/6a43fcccc51dae4d101ac7d378a8724d1ba3de628a24e11bf2f4f43cba4d/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_aarch64.whl", hash = "sha256:9c2da296c8d3415b93e6053f5a728649a87a48ce084a9aaf51d6e46c87c7f2d2", size = 4269372, upload-time = "2026-01-28T00:23:44.655Z" }, + { url = "https://files.pythonhosted.org/packages/17/b7/0f6b8c1dd0779df2b526e78978ff00462355e31c0a6f6cff8a3e99889c90/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_ppc64le.whl", hash = "sha256:9b34d8ba84454641a6bf4d6762d15847ecbd85c1316c0a7984e6e4e9f748ec2e", size = 4891908, upload-time = "2026-01-28T00:23:46.48Z" }, + { url = "https://files.pythonhosted.org/packages/83/17/259409b8349aa10535358807a472c6a695cf84f106022268d31cea2b6c97/cryptography-46.0.4-cp314-cp314t-manylinux_2_34_x86_64.whl", hash = "sha256:df4a817fa7138dd0c96c8c8c20f04b8aaa1fac3bbf610913dcad8ea82e1bfd3f", size = 4441254, upload-time = "2026-01-28T00:23:48.403Z" }, + { url = "https://files.pythonhosted.org/packages/9c/fe/e4a1b0c989b00cee5ffa0764401767e2d1cf59f45530963b894129fd5dce/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:b1de0ebf7587f28f9190b9cb526e901bf448c9e6a99655d2b07fff60e8212a82", size = 4396520, upload-time = "2026-01-28T00:23:50.26Z" }, + { url = "https://files.pythonhosted.org/packages/b3/81/ba8fd9657d27076eb40d6a2f941b23429a3c3d2f56f5a921d6b936a27bc9/cryptography-46.0.4-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:9b4d17bc7bd7cdd98e3af40b441feaea4c68225e2eb2341026c84511ad246c0c", size = 4651479, upload-time = "2026-01-28T00:23:51.674Z" }, + { url = "https://files.pythonhosted.org/packages/d8/cc/8f3224cbb2a928de7298d6ed4790f5ebc48114e02bdc9559196bfb12435d/cryptography-46.0.4-cp38-abi3-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8bf75b0259e87fa70bddc0b8b4078b76e7fd512fd9afae6c1193bcf440a4dbef", size = 4275419, upload-time = "2026-01-28T00:23:58.364Z" }, + { url = "https://files.pythonhosted.org/packages/17/43/4a18faa7a872d00e4264855134ba82d23546c850a70ff209e04ee200e76f/cryptography-46.0.4-cp38-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:3c268a3490df22270955966ba236d6bc4a8f9b6e4ffddb78aac535f1a5ea471d", size = 4419058, upload-time = "2026-01-28T00:23:59.867Z" }, + { url = "https://files.pythonhosted.org/packages/ee/64/6651969409821d791ba12346a124f55e1b76f66a819254ae840a965d4b9c/cryptography-46.0.4-cp38-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:812815182f6a0c1d49a37893a303b44eaac827d7f0d582cecfc81b6427f22973", size = 4278151, upload-time = "2026-01-28T00:24:01.731Z" }, + { url = "https://files.pythonhosted.org/packages/20/0b/a7fce65ee08c3c02f7a8310cc090a732344066b990ac63a9dfd0a655d321/cryptography-46.0.4-cp38-abi3-manylinux_2_28_ppc64le.whl", hash = "sha256:a90e43e3ef65e6dcf969dfe3bb40cbf5aef0d523dff95bfa24256be172a845f4", size = 4939441, upload-time = "2026-01-28T00:24:03.175Z" }, + { url = "https://files.pythonhosted.org/packages/db/a7/20c5701e2cd3e1dfd7a19d2290c522a5f435dd30957d431dcb531d0f1413/cryptography-46.0.4-cp38-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a05177ff6296644ef2876fce50518dffb5bcdf903c85250974fc8bc85d54c0af", size = 4451617, upload-time = "2026-01-28T00:24:05.403Z" }, + { url = "https://files.pythonhosted.org/packages/00/dc/3e16030ea9aa47b63af6524c354933b4fb0e352257c792c4deeb0edae367/cryptography-46.0.4-cp38-abi3-manylinux_2_31_armv7l.whl", hash = "sha256:daa392191f626d50f1b136c9b4cf08af69ca8279d110ea24f5c2700054d2e263", size = 3977774, upload-time = "2026-01-28T00:24:06.851Z" }, + { url = "https://files.pythonhosted.org/packages/42/c8/ad93f14118252717b465880368721c963975ac4b941b7ef88f3c56bf2897/cryptography-46.0.4-cp38-abi3-manylinux_2_34_aarch64.whl", hash = "sha256:e07ea39c5b048e085f15923511d8121e4a9dc45cee4e3b970ca4f0d338f23095", size = 4277008, upload-time = "2026-01-28T00:24:08.926Z" }, + { url = "https://files.pythonhosted.org/packages/00/cf/89c99698151c00a4631fbfcfcf459d308213ac29e321b0ff44ceeeac82f1/cryptography-46.0.4-cp38-abi3-manylinux_2_34_ppc64le.whl", hash = "sha256:d5a45ddc256f492ce42a4e35879c5e5528c09cd9ad12420828c972951d8e016b", size = 4903339, upload-time = "2026-01-28T00:24:12.009Z" }, + { url = "https://files.pythonhosted.org/packages/03/c3/c90a2cb358de4ac9309b26acf49b2a100957e1ff5cc1e98e6c4996576710/cryptography-46.0.4-cp38-abi3-manylinux_2_34_x86_64.whl", hash = "sha256:6bb5157bf6a350e5b28aee23beb2d84ae6f5be390b2f8ee7ea179cda077e1019", size = 4451216, upload-time = "2026-01-28T00:24:13.975Z" }, + { url = "https://files.pythonhosted.org/packages/96/2c/8d7f4171388a10208671e181ca43cdc0e596d8259ebacbbcfbd16de593da/cryptography-46.0.4-cp38-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:dd5aba870a2c40f87a3af043e0dee7d9eb02d4aff88a797b48f2b43eff8c3ab4", size = 4404299, upload-time = "2026-01-28T00:24:16.169Z" }, + { url = "https://files.pythonhosted.org/packages/e9/23/cbb2036e450980f65c6e0a173b73a56ff3bccd8998965dea5cc9ddd424a5/cryptography-46.0.4-cp38-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:93d8291da8d71024379ab2cb0b5c57915300155ad42e07f76bea6ad838d7e59b", size = 4664837, upload-time = "2026-01-28T00:24:17.629Z" }, + { url = "https://files.pythonhosted.org/packages/27/7a/f8d2d13227a9a1a9fe9c7442b057efecffa41f1e3c51d8622f26b9edbe8f/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:c236a44acfb610e70f6b3e1c3ca20ff24459659231ef2f8c48e879e2d32b73da", size = 4216693, upload-time = "2026-01-28T00:24:25.758Z" }, + { url = "https://files.pythonhosted.org/packages/c5/de/3787054e8f7972658370198753835d9d680f6cd4a39df9f877b57f0dd69c/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8a15fb869670efa8f83cbffbc8753c1abf236883225aed74cd179b720ac9ec80", size = 4382765, upload-time = "2026-01-28T00:24:27.577Z" }, + { url = "https://files.pythonhosted.org/packages/8a/5f/60e0afb019973ba6a0b322e86b3d61edf487a4f5597618a430a2a15f2d22/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_aarch64.whl", hash = "sha256:fdc3daab53b212472f1524d070735b2f0c214239df131903bae1d598016fa822", size = 4216066, upload-time = "2026-01-28T00:24:29.056Z" }, + { url = "https://files.pythonhosted.org/packages/81/8e/bf4a0de294f147fee66f879d9bae6f8e8d61515558e3d12785dd90eca0be/cryptography-46.0.4-pp311-pypy311_pp73-manylinux_2_34_x86_64.whl", hash = "sha256:44cc0675b27cadb71bdbb96099cca1fa051cd11d2ade09e5cd3a2edb929ed947", size = 4382025, upload-time = "2026-01-28T00:24:30.681Z" }, ] [[package]] @@ -583,11 +583,11 @@ wheels = [ [[package]] name = "packaging" -version = "25.0" +version = "26.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/a1/d4/1fc4078c65507b51b96ca8f8c3ba19e6a61c8253c72794544580a7b6c24d/packaging-25.0.tar.gz", hash = "sha256:d443872c98d677bf60f6a1f2f8c1cb748e8fe762d2bf9d3148b5599295b0fc4f", size = 165727, upload-time = "2025-04-19T11:48:59.673Z" } +sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/20/12/38679034af332785aac8774540895e234f4d07f7545804097de4b666afd8/packaging-25.0-py3-none-any.whl", hash = "sha256:29572ef2b1f17581046b3a2227d5c611fb25ec70ca1ba8554b24b0e69331a484", size = 66469, upload-time = "2025-04-19T11:48:57.875Z" }, + { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" }, ] [[package]] @@ -610,11 +610,11 @@ wheels = [ [[package]] name = "pycparser" -version = "2.23" +version = "3.0" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/fe/cf/d2d3b9f5699fb1e4615c8e32ff220203e43b248e1dfcc6736ad9057731ca/pycparser-2.23.tar.gz", hash = "sha256:78816d4f24add8f10a06d6f05b4d424ad9e96cfebf68a4ddc99c65c0720d00c2", size = 173734, upload-time = "2025-09-09T13:23:47.91Z" } +sdist = { url = "https://files.pythonhosted.org/packages/1b/7d/92392ff7815c21062bea51aa7b87d45576f649f16458d78b7cf94b9ab2e6/pycparser-3.0.tar.gz", hash = "sha256:600f49d217304a5902ac3c37e1281c9fe94e4d0489de643a9504c5cdfdfc6b29", size = 103492, upload-time = "2026-01-21T14:26:51.89Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a0/e3/59cd50310fc9b59512193629e1984c1f95e5c8ae6e5d8c69532ccc65a7fe/pycparser-2.23-py3-none-any.whl", hash = "sha256:e5c6e8d3fbad53479cab09ac03729e0a9faf2bee3db8208a550daf5af81a5934", size = 118140, upload-time = "2025-09-09T13:23:46.651Z" }, + { url = "https://files.pythonhosted.org/packages/0c/c3/44f3fbbfa403ea2a7c779186dc20772604442dde72947e7d01069cbe98e3/pycparser-3.0-py3-none-any.whl", hash = "sha256:b727414169a36b7d524c1c3e31839a521725078d7b2ff038656844266160a992", size = 48172, upload-time = "2026-01-21T14:26:50.693Z" }, ] [[package]] From b2bc5acefffd988a46785930e21879049430f6a6 Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 2 Feb 2026 19:51:40 +0500 Subject: [PATCH 02/25] fix(core): Fixed the alignment and layout of report elements --- codeclone/html_report.py | 125 +++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 32 deletions(-) diff --git a/codeclone/html_report.py b/codeclone/html_report.py index ac7d762..440882d 100644 --- a/codeclone/html_report.py +++ b/codeclone/html_report.py @@ -105,9 +105,9 @@ def _prefix_css(css: str, prefix: str) -> str: out_lines.append(line) continue if ( - stripped.startswith("/*") - or stripped.startswith("*") - or stripped.startswith("*/") + stripped.startswith("/*") + or stripped.startswith("*") + or stripped.startswith("*/") ): out_lines.append(line) continue @@ -126,13 +126,13 @@ def _prefix_css(css: str, prefix: str) -> str: def _render_code_block( - *, - filepath: str, - start_line: int, - end_line: int, - file_cache: _FileCache, - context: int, - max_lines: int, + *, + filepath: str, + start_line: int, + end_line: int, + file_cache: _FileCache, + context: int, + max_lines: int, ) -> _Snippet: lines = file_cache.get_lines(filepath) @@ -164,13 +164,13 @@ def _render_code_block( filepath=filepath, start_line=start_line, end_line=end_line, - code_html=f'
{body}
', + code_html=f'
{body}
', ) -# ============================ +# ============================ # HTML report builder -# ============================ +# ============================ def _escape(v: Any) -> str: @@ -354,13 +354,43 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: justify-content: space-between; align-items: center; gap: 16px; - flex-wrap: wrap; padding: 12px; background: var(--panel); border: 1px solid var(--border); border-radius: 6px; } +.toolbar-left { + display: flex; + align-items: center; + gap: 12px; + flex: 1; +} + +.toolbar-right { + display: flex; + align-items: center; + gap: 12px; +} + +@media (max-width: 768px) { + .section-toolbar { + flex-direction: column; + align-items: stretch; + } + + .toolbar-left, + .toolbar-right { + width: 100%; + justify-content: space-between; + } + + .search-wrap { + min-width: 0; + flex: 1; + } +} + .search-wrap { display: flex; align-items: center; @@ -416,6 +446,14 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: font-size: 13px; } +.page-meta { + color: var(--text); + font-size: 13px; + white-space: nowrap; + min-width: 80px; + text-align: center; +} + .pill { padding: 2px 10px; border-radius: 99px; @@ -503,12 +541,13 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: grid-template-columns: 1fr 1fr; gap: 16px; margin-bottom: 16px; + min-width: 0; /* Allow grid items to shrink */ } .item-pair:last-child { margin-bottom: 0; } -@media (max-width: 1000px) { +@media (max-width: 1200px) { .item-pair { grid-template-columns: 1fr; } @@ -520,6 +559,7 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: overflow: hidden; display: flex; flex-direction: column; + min-width: 0; /* Allow flex items to shrink below content size */ } .item-head { @@ -542,13 +582,34 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: .codebox { margin: 0; - padding: 12px; + padding: 0; font-family: var(--mono); font-size: 12px; line-height: 1.5; - overflow: auto; + overflow-x: auto; + overflow-y: auto; background: var(--bg); flex: 1; + max-width: 100%; + max-height: 600px; +} + +.codebox pre { + margin: 0; + padding: 12px; + white-space: pre; + word-wrap: normal; + overflow-wrap: normal; + min-width: max-content; +} + +.codebox code { + display: block; + white-space: pre; + word-wrap: normal; + overflow-wrap: normal; + font-family: inherit; + font-size: inherit; } .empty { @@ -754,12 +815,12 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: def build_html_report( - *, - func_groups: dict[str, list[dict[str, Any]]], - block_groups: dict[str, list[dict[str, Any]]], - title: str = "CodeClone Report", - context_lines: int = 3, - max_snippet_lines: int = 220, + *, + func_groups: dict[str, list[dict[str, Any]]], + block_groups: dict[str, list[dict[str, Any]]], + title: str = "CodeClone Report", + context_lines: int = 3, + max_snippet_lines: int = 220, ) -> str: file_cache = _FileCache() @@ -780,9 +841,9 @@ def build_html_report( pyg_dark = _prefix_css(pyg_dark_raw, "html[data-theme='dark']") pyg_light = _prefix_css(pyg_light_raw, "html[data-theme='light']") - # ============================ + # ============================ # Icons (Inline SVG) - # ============================ + # ============================ ICON_SEARCH = '' ICON_X = '' ICON_CHEV_DOWN = '' @@ -792,15 +853,15 @@ def build_html_report( ICON_PREV = '' ICON_NEXT = '' - # ---------------------------- + # ---------------------------- # Section renderer # ---------------------------- def render_section( - section_id: str, - section_title: str, - groups: list[tuple[str, list[dict[str, Any]]]], - pill_cls: str, + section_id: str, + section_title: str, + groups: list[tuple[str, list[dict[str, Any]]]], + pill_cls: str, ) -> str: if not groups: return "" @@ -904,9 +965,9 @@ def render_section( out.append("") return "\n".join(out) - # ============================ + # ============================ # HTML Rendering - # ============================ + # ============================ empty_state_html = "" if not has_any: From 2313138f57929b1ea5de358bc983e51da84b42ef Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 2 Feb 2026 19:54:19 +0500 Subject: [PATCH 03/25] feat(core): Bump version to 1.2.1 --- codeclone/cli.py | 2 +- pyproject.toml | 2 +- uv.lock | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/codeclone/cli.py b/codeclone/cli.py index 250a278..f77bc61 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -80,7 +80,7 @@ def process_file( def print_banner(): console.print( Panel.fit( - "[bold white]CodeClone[/bold white] [dim]v1.2.0[/dim]\n" + "[bold white]CodeClone[/bold white] [dim]v1.2.1[/dim]\n" "[italic]Architectural duplication detector[/italic]", border_style="blue", padding=(0, 2), diff --git a/pyproject.toml b/pyproject.toml index f2ee6c0..e5fa873 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "codeclone" -version = "1.2.0" +version = "1.2.1" description = "AST and CFG-based code clone detector for Python focused on architectural duplication" readme = { file = "README.md", content-type = "text/markdown" } license = { text = "MIT" } diff --git a/uv.lock b/uv.lock index 6f50ae3..d10705b 100644 --- a/uv.lock +++ b/uv.lock @@ -180,7 +180,7 @@ wheels = [ [[package]] name = "codeclone" -version = "1.2.0" +version = "1.2.1" source = { editable = "." } dependencies = [ { name = "pygments" }, From afad36a2cfe71d491f3c0dd14885c300a76a6cdd Mon Sep 17 00:00:00 2001 From: Den Rozhnovskiy Date: Mon, 2 Feb 2026 23:12:18 +0500 Subject: [PATCH 04/25] fix(audit): Security & Robustness fixes - Fix audit findings: add AST parsing limits/timeout guards, expand sensitive dir denylist, optimize HTML report snippet reads, and warn on cache signature/version mismatches. - Refactor CLI error handling + batching; add clear fallback warnings for parallel processing; tighten typing around cache stats. - Apply slots across classes where appropriate, and clean up formatting for Ruff compliance. - Update CHANGELOG.md for 1.2.1 with security, performance, and test coverage notes. --- CHANGELOG.md | 103 +++++ codeclone.baseline.json | 6 - codeclone/__init__.py | 2 +- codeclone/baseline.py | 13 +- codeclone/blockhash.py | 2 +- codeclone/blocks.py | 7 +- codeclone/cache.py | 171 +++++++- codeclone/cfg.py | 181 +++----- codeclone/cli.py | 402 ++++++++++++----- codeclone/extractor.py | 125 +++++- codeclone/html_report.py | 918 +++++++++------------------------------ codeclone/normalize.py | 35 +- codeclone/report.py | 35 +- codeclone/scanner.py | 65 ++- pyproject.toml | 25 +- uv.lock | 148 +++++++ 16 files changed, 1198 insertions(+), 1040 deletions(-) delete mode 100644 codeclone.baseline.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 334d6fa..097db53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,108 @@ # Changelog +## [1.2.1] - 2026-02-XX + +## Overview + +This release focuses on security hardening, robustness, and long-term maintainability. +No breaking API changes were introduced. + +The goal of this release is to provide users with a safe, deterministic, and CI-friendly +tool suitable for security-sensitive and large-scale environments. + +--- + +## Security & Robustness + +- **Path Traversal Protection** + Implemented strict path validation to prevent scanning outside the project root or + accessing sensitive system directories, including macOS `/private` paths. + +- **Cache Integrity Protection** + Added HMAC-SHA256 signing for cache files to prevent cache poisoning and detect tampering. + +- **Parser Safety Limits** + Introduced AST parsing time limits to mitigate risks from pathological or adversarial inputs. + +- **Resource Exhaustion Protection** + Enforced a maximum file size limit (10MB) and a maximum file count per scan to prevent + excessive memory or CPU usage. + +- **Structured Error Handling** + Introduced a dedicated exception hierarchy (`ParseError`, `CacheError`, etc.) and replaced + broad exception handling with graceful, user-friendly failure reporting. + +--- + +## Performance Improvements + +- **Optimized AST Normalization** + Replaced expensive `deepcopy` operations with in-place AST normalization, significantly + reducing CPU and memory overhead. + +- **Improved Memory Efficiency** + Added an LRU cache for file reading and optimized string concatenation during fingerprint + generation. + +- **HTML Report Memory Bounds** + HTML reports now read only the required line ranges instead of entire files, reducing peak + memory usage on large codebases. + +--- + +## Architecture & Maintainability + +- **Strict Type Safety** + Migrated all optional typing to Python 3.10+ `| None` syntax and achieved 100% `mypy` strict + compliance. + +- **Modular CFG Design** + Split CFG data structures and builder logic into separate modules (`cfg_model.py` and + `cfg.py`) for improved clarity and extensibility. + +- **Template Extraction** + Extracted HTML templates into a dedicated `templates.py` module. + +- Added a `py.typed` marker for downstream type checkers. +- Added `__slots__` to performance-critical classes to reduce per-object memory overhead. + +--- + +## CLI & User Experience + +- Added a sequential execution fallback when process pools are unavailable (for example, in + restricted or sandboxed environments). +- Emit clear, user-visible warnings when cache validation fails instead of silently ignoring + corrupted state. + +--- + +## Testing & Quality + +- Expanded unit and integration test coverage across the CLI, CFG construction, cache + handling, scanner, and HTML reporting paths. +- Achieved and enforced 98%+ line coverage, with coverage configuration added to + `pyproject.toml`. + +--- + +## Fixed + +- **CFG Exception Handling** + Fixed incorrect control-flow linking for `try`/`except` blocks. + +- **Pattern Matching Support** + Added missing structural handling for `match`/`case` statements in the CFG. + +- **Block Detection Scaling** + Made `MIN_LINE_DISTANCE` dynamic based on block size to improve clone detection accuracy + across differently sized functions. + +--- + +It is recommended for all users, especially those running the tool in CI/CD or +security-sensitive environments. + ## [1.2.0] - 2026-02-02 ### BREAKING CHANGES diff --git a/codeclone.baseline.json b/codeclone.baseline.json deleted file mode 100644 index a4ac58f..0000000 --- a/codeclone.baseline.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "functions": [ - "7d573fa56fb11050f1642f18ca4bb3225e11e194|0-19" - ], - "blocks": [] -} \ No newline at end of file diff --git a/codeclone/__init__.py b/codeclone/__init__.py index 901a59b..4cd6ae8 100644 --- a/codeclone/__init__.py +++ b/codeclone/__init__.py @@ -6,7 +6,7 @@ Licensed under the MIT License. """ -from importlib.metadata import version, PackageNotFoundError +from importlib.metadata import PackageNotFoundError, version try: __version__ = version("codeclone") diff --git a/codeclone/baseline.py b/codeclone/baseline.py index ecdadc4..4ccb0af 100644 --- a/codeclone/baseline.py +++ b/codeclone/baseline.py @@ -9,10 +9,13 @@ from __future__ import annotations import json +from collections.abc import Mapping from pathlib import Path class Baseline: + __slots__ = ("blocks", "functions", "path") + def __init__(self, path: str | Path): self.path = Path(path) self.functions: set[str] = set() @@ -45,14 +48,18 @@ def save(self) -> None: @staticmethod def from_groups( - func_groups: dict, block_groups: dict, path: str | Path = "" - ) -> "Baseline": + func_groups: Mapping[str, object], + block_groups: Mapping[str, object], + path: str | Path = "", + ) -> Baseline: bl = Baseline(path) bl.functions = set(func_groups.keys()) bl.blocks = set(block_groups.keys()) return bl - def diff(self, func_groups: dict, block_groups: dict) -> tuple[set, set]: + def diff( + self, func_groups: Mapping[str, object], block_groups: Mapping[str, object] + ) -> tuple[set[str], set[str]]: new_funcs = set(func_groups.keys()) - self.functions new_blocks = set(block_groups.keys()) - self.blocks return new_funcs, new_blocks diff --git a/codeclone/blockhash.py b/codeclone/blockhash.py index c304acf..bd213ee 100644 --- a/codeclone/blockhash.py +++ b/codeclone/blockhash.py @@ -11,7 +11,7 @@ import ast import hashlib -from .normalize import NormalizationConfig, AstNormalizer +from .normalize import AstNormalizer, NormalizationConfig def stmt_hash(stmt: ast.stmt, cfg: NormalizationConfig) -> str: diff --git a/codeclone/blocks.py b/codeclone/blocks.py index cfdc6db..551d243 100644 --- a/codeclone/blocks.py +++ b/codeclone/blocks.py @@ -15,7 +15,7 @@ from .normalize import NormalizationConfig -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class BlockUnit: block_hash: str filepath: str @@ -42,7 +42,8 @@ def extract_blocks( blocks: list[BlockUnit] = [] last_start: int | None = None - MIN_LINE_DISTANCE = 5 # suppress overlapping windows + # Allow some overlap (50%), but at least 3 lines apart + min_line_distance = max(block_size // 2, 3) for i in range(len(stmt_hashes) - block_size + 1): start = getattr(body[i], "lineno", None) @@ -50,7 +51,7 @@ def extract_blocks( if not start or not end: continue - if last_start is not None and start - last_start < MIN_LINE_DISTANCE: + if last_start is not None and start - last_start < min_line_distance: continue bh = "|".join(stmt_hashes[i : i + block_size]) diff --git a/codeclone/cache.py b/codeclone/cache.py index 410f464..f652d17 100644 --- a/codeclone/cache.py +++ b/codeclone/cache.py @@ -8,47 +8,178 @@ from __future__ import annotations +import hashlib +import hmac import json import os +import secrets +from collections.abc import Mapping from dataclasses import asdict from pathlib import Path -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, TypedDict, cast + +if TYPE_CHECKING: + from .blocks import BlockUnit + from .extractor import Unit + +from .errors import CacheError + + +class FileStat(TypedDict): + mtime_ns: int + size: int + + +class UnitDict(TypedDict): + qualname: str + filepath: str + start_line: int + end_line: int + loc: int + stmt_count: int + fingerprint: str + loc_bucket: str + + +class BlockDict(TypedDict): + block_hash: str + filepath: str + qualname: str + start_line: int + end_line: int + size: int + + +class CacheEntry(TypedDict): + stat: FileStat + units: list[UnitDict] + blocks: list[BlockDict] + + +class CacheData(TypedDict): + version: str + files: dict[str, CacheEntry] class Cache: + __slots__ = ("data", "load_warning", "path", "secret") + CACHE_VERSION = "1.0" + def __init__(self, path: str | Path): self.path = Path(path) - self.data: dict[str, Any] = {"files": {}} + self.data: CacheData = {"version": self.CACHE_VERSION, "files": {}} + self.secret = self._load_secret() + self.load_warning: str | None = None - def load(self) -> None: - if self.path.exists(): + def _load_secret(self) -> bytes: + """Load or create cache signing secret.""" + # Store secret in the same directory as the cache file, named .cache_secret + # If cache is at ~/.cache/codeclone/cache.json, secret is + # ~/.cache/codeclone/.cache_secret + secret_path = self.path.parent / ".cache_secret" + if secret_path.exists(): + return secret_path.read_bytes() + else: + secret = secrets.token_bytes(32) try: - self.data = json.loads(self.path.read_text("utf-8")) - except json.JSONDecodeError: - # If cache is corrupted, start fresh - self.data = {"files": {}} + self.path.parent.mkdir(parents=True, exist_ok=True) + secret_path.write_bytes(secret) + # Set restrictive permissions on secret file (Unix only) + if os.name == "posix": + secret_path.chmod(0o600) + except OSError: + pass + return secret + + def _sign_data(self, data: Mapping[str, Any]) -> str: + """Create HMAC signature of cache data.""" + # Sort keys for deterministic JSON serialization + data_str = json.dumps(data, sort_keys=True) + return hmac.new(self.secret, data_str.encode(), hashlib.sha256).hexdigest() + + def load(self) -> None: + if not self.path.exists(): + return + + try: + raw = json.loads(self.path.read_text("utf-8")) + stored_sig = raw.get("_signature") + + # Extract data without signature for verification + data = {k: v for k, v in raw.items() if k != "_signature"} + + # Verify signature + expected_sig = self._sign_data(data) + if stored_sig != expected_sig: + self.load_warning = "Cache signature mismatch; ignoring cache." + self.data = {"version": self.CACHE_VERSION, "files": {}} + return + + if data.get("version") != self.CACHE_VERSION: + self.load_warning = ( + "Cache version mismatch " + f"(found {data.get('version')}); ignoring cache." + ) + self.data = {"version": self.CACHE_VERSION, "files": {}} + return + + # Basic structure check + if not isinstance(data.get("files"), dict): + self.load_warning = "Cache format invalid; ignoring cache." + self.data = {"version": self.CACHE_VERSION, "files": {}} + return + + self.data = cast(CacheData, data) + self.load_warning = None + + except (json.JSONDecodeError, ValueError): + self.load_warning = "Cache corrupted; ignoring cache." + self.data = {"version": self.CACHE_VERSION, "files": {}} def save(self) -> None: - self.path.parent.mkdir(parents=True, exist_ok=True) - self.path.write_text( - json.dumps(self.data, ensure_ascii=False, indent=2), - "utf-8", - ) + try: + self.path.parent.mkdir(parents=True, exist_ok=True) + + # Add signature + data_with_sig = {**self.data, "_signature": self._sign_data(self.data)} + + self.path.write_text( + json.dumps(data_with_sig, ensure_ascii=False, indent=2), + "utf-8", + ) + except OSError as e: + raise CacheError(f"Failed to save cache: {e}") from e + + def get_file_entry(self, filepath: str) -> CacheEntry | None: + entry = self.data["files"].get(filepath) + + if entry is None: + return None + + if not isinstance(entry, dict): + return None + + required = {"stat", "units", "blocks"} + if not required.issubset(entry.keys()): + return None - def get_file_entry(self, filepath: str) -> Optional[dict[str, Any]]: - return self.data.get("files", {}).get(filepath) + return entry def put_file_entry( - self, filepath: str, stat_sig: dict[str, Any], units: list, blocks: list + self, + filepath: str, + stat_sig: FileStat, + units: list[Unit], + blocks: list[BlockUnit], ) -> None: - self.data.setdefault("files", {})[filepath] = { + self.data["files"][filepath] = { "stat": stat_sig, - "units": [asdict(u) for u in units], - "blocks": [asdict(b) for b in blocks], + "units": cast(list[UnitDict], cast(object, [asdict(u) for u in units])), + "blocks": cast(list[BlockDict], cast(object, [asdict(b) for b in blocks])), } -def file_stat_signature(path: str) -> dict: +def file_stat_signature(path: str) -> FileStat: st = os.stat(path) return { "mtime_ns": st.st_mtime_ns, diff --git a/codeclone/cfg.py b/codeclone/cfg.py index 167f5f0..9235a7f 100644 --- a/codeclone/cfg.py +++ b/codeclone/cfg.py @@ -9,48 +9,21 @@ from __future__ import annotations import ast -from dataclasses import dataclass, field -from typing import Iterable +from collections.abc import Iterable +from typing import Protocol, cast +from .cfg_model import CFG, Block -# ========================= -# Core CFG structures -# ========================= - - -@dataclass(eq=False) -class Block: - id: int - statements: list[ast.stmt] = field(default_factory=list) - successors: set["Block"] = field(default_factory=set) - is_terminated: bool = False - - def add_successor(self, block: Block) -> None: - self.successors.add(block) - - def __hash__(self) -> int: - return hash(self.id) +__all__ = ["CFG", "CFGBuilder"] - def __eq__(self, other: object) -> bool: - return isinstance(other, Block) and self.id == other.id +TryStar = getattr(ast, "TryStar", ast.Try) -@dataclass -class CFG: - qualname: str - blocks: list[Block] = field(default_factory=list) - - entry: Block = field(init=False) - exit: Block = field(init=False) - - def __post_init__(self) -> None: - self.entry = self.create_block() - self.exit = self.create_block() - - def create_block(self) -> Block: - block = Block(id=len(self.blocks)) - self.blocks.append(block) - return block +class _TryLike(Protocol): + body: list[ast.stmt] + handlers: list[ast.ExceptHandler] + orelse: list[ast.stmt] + finalbody: list[ast.stmt] # ========================= @@ -59,6 +32,8 @@ def create_block(self) -> Block: class CFGBuilder: + __slots__ = ("cfg", "current") + def __init__(self) -> None: self.cfg: CFG self.current: Block @@ -110,8 +85,10 @@ def _visit(self, stmt: ast.stmt) -> None: case ast.AsyncFor(): self._visit_for(stmt) # Structure is identical to For - case ast.Try() | ast.TryStar(): - self._visit_try(stmt) + case ast.Try(): + self._visit_try(cast(_TryLike, stmt)) + case _ if TryStar is not None and isinstance(stmt, TryStar): + self._visit_try(cast(_TryLike, stmt)) case ast.With() | ast.AsyncWith(): self._visit_with(stmt) @@ -185,7 +162,8 @@ def _visit_for(self, stmt: ast.For | ast.AsyncFor) -> None: self.current = after_block def _visit_with(self, stmt: ast.With | ast.AsyncWith) -> None: - # Treat WITH as linear flow (enter -> body -> exit), but preserve block structure + # Treat WITH as linear flow (enter -> body -> exit), but preserve + # block structure # We record the context manager expression in the current block # Then we enter a new block for the body (to separate it structurally) # Then we enter a new block for 'after' (exit) @@ -210,126 +188,73 @@ def _visit_with(self, stmt: ast.With | ast.AsyncWith) -> None: self.current = after_block - def _visit_try(self, stmt: ast.Try | ast.TryStar) -> None: - # Simplified Try CFG: - # Try Body -> [Handlers...] -> Finally/After - # Try Body -> Else -> Finally/After - - try_block = self.cfg.create_block() - self.current.add_successor(try_block) - - # We don't know WHERE in the try block exception happens, so we assume - # any point in try block *could* jump to handlers. - # But for structural hashing, we just process the body. - # Ideally, we should link the try_block (or its end) to handlers? - # A simple approximation: - # 1. Process body. - # 2. Link entry (or end of body) to handlers? - # Let's do: Entry -> BodyBlock. - # Entry -> HandlerBlocks (to represent potential jump). - - # Actually, let's keep it linear but branched. - # Current -> TryBody - # Current -> Handlers (Abstractly representing the jump) + def _visit_try(self, stmt: _TryLike) -> None: + try_entry = self.cfg.create_block() + self.current.add_successor(try_entry) + self.current = try_entry handlers_blocks = [self.cfg.create_block() for _ in stmt.handlers] else_block = self.cfg.create_block() if stmt.orelse else None - final_block = self.cfg.create_block() # This is finally or after + final_block = self.cfg.create_block() - # Link current to TryBody - self.current = try_block - self._visit_statements(stmt.body) + # Process each statement in try body + # Link each to exception handlers + for stmt_node in stmt.body: + if self.current.is_terminated: + break + + # Current statement could raise exception + for h_block in handlers_blocks: + self.current.add_successor(h_block) + + self._visit(stmt_node) - # If try body finishes successfully: + # Normal exit from try if not self.current.is_terminated: if else_block: self.current.add_successor(else_block) else: self.current.add_successor(final_block) - # Handle Else - if else_block: - self.current = else_block - self._visit_statements(stmt.orelse) - if not self.current.is_terminated: - self.current.add_successor(final_block) - - # Handle Handlers - # We assume control flow *could* jump from start of Try to any handler - # (Technically from inside try, but we model structural containment) - # To make fingerprints stable, we just need to ensure handlers are visited - # and linked. - - # We link the *original* predecessor (before try) or the try_block start to handlers? - # Let's link the `try_block` (as a container concept) to handlers. - # But `try_block` was mutated by `_visit_statements`. - # Let's use the `try_block` (start of try) to link to handlers. - for h_block in handlers_blocks: - try_block.add_successor(h_block) - - for handler, h_block in zip(stmt.handlers, handlers_blocks): + # Process handlers + for handler, h_block in zip(stmt.handlers, handlers_blocks, strict=True): self.current = h_block - # Record exception type if handler.type: self.current.statements.append(ast.Expr(value=handler.type)) + self._visit_statements(handler.body) if not self.current.is_terminated: self.current.add_successor(final_block) - # Finally logic: - # If there is a finally block, `final_block` IS the finally block. - # We visit it. Then we create a new `after_finally` block? - # Or `final_block` is the start of finally. + # Process else + if else_block: + self.current = else_block + self._visit_statements(stmt.orelse) + if not self.current.is_terminated: + self.current.add_successor(final_block) + # Process finally + self.current = final_block if stmt.finalbody: - self.current = final_block self._visit_statements(stmt.finalbody) - # And then continue to next code? - # Yes, finally flows to next statement. - # Unless terminated. - - # If no finally, `final_block` is just the merge point (after). - self.current = final_block def _visit_match(self, stmt: ast.Match) -> None: - # Match subject -> Cases -> After - self.current.statements.append(ast.Expr(value=stmt.subject)) - after_block = self.cfg.create_block() - - for case_ in stmt.cases: - case_block = self.cfg.create_block() - self.current.add_successor(case_block) - - # Save current context to restore for next case branching? - # No, 'current' is the match subject block. It branches to ALL cases. - - # Visit Case - # We must set self.current to case_block for visiting body - # But we lose reference to 'match subject block' to link next case! - # So we need a variable `subject_block`. - pass - - # Re-implementing loop correctly subject_block = self.current + after_block = self.cfg.create_block() for case_ in stmt.cases: case_block = self.cfg.create_block() subject_block.add_successor(case_block) self.current = case_block - # We could record the pattern here? - # patterns are complex AST nodes. For now, let's skip pattern structure hash - # and just hash the body. Or dump pattern as statement? - # Pattern is not a statement. - # Let's ignore pattern details for V1, or try to normalize it. - # If we ignore pattern, then `case []:` and `case {}:` look same. - # Ideally: `self.current.statements.append(case_.pattern)` but pattern is not stmt. - # We can wrap in Expr? `ast.Expr(value=case_.pattern)`? - # Pattern is NOT an Expr subclass in 3.10. It's `ast.pattern`. - # So we cannot append it to `statements` list which expects `ast.stmt`. - # We will ignore pattern structure for now (it's structural flow we care about). + + # Record pattern structure + pattern_repr = ast.dump(case_.pattern, annotate_fields=False) + self.current.statements.append( + ast.Expr(value=ast.Constant(value=f"PATTERN:{pattern_repr}")) + ) self._visit_statements(case_.body) if not self.current.is_terminated: diff --git a/codeclone/cli.py b/codeclone/cli.py index f77bc61..9a787ec 100644 --- a/codeclone/cli.py +++ b/codeclone/cli.py @@ -1,36 +1,32 @@ -""" -CodeClone — AST and CFG-based code clone detector for Python -focused on architectural duplication. - -Copyright (c) 2026 Den Rozhnovskiy -Licensed under the MIT License. -""" - from __future__ import annotations import argparse +import os import sys from concurrent.futures import ProcessPoolExecutor, as_completed +from dataclasses import asdict, dataclass from pathlib import Path +from typing import Any, cast from rich.console import Console from rich.panel import Panel from rich.progress import ( + BarColumn, Progress, SpinnerColumn, TextColumn, - BarColumn, TimeElapsedColumn, ) from rich.table import Table from rich.theme import Theme from .baseline import Baseline -from .cache import Cache, file_stat_signature +from .cache import Cache, CacheEntry, FileStat, file_stat_signature +from .errors import CacheError from .extractor import extract_units_from_source from .html_report import build_html_report from .normalize import NormalizationConfig -from .report import build_groups, build_block_groups, to_json, to_text +from .report import build_block_groups, build_groups, to_json_report, to_text from .scanner import iter_py_files, module_name_from_path # Custom theme for Rich @@ -45,6 +41,21 @@ ) console = Console(theme=custom_theme, width=200) +MAX_FILE_SIZE = 10 * 1024 * 1024 # 10MB +BATCH_SIZE = 100 + + +@dataclass(slots=True) +class ProcessingResult: + """Result of processing a single file.""" + + filepath: str + success: bool + error: str | None = None + units: list[Any] | None = None + blocks: list[Any] | None = None + stat: FileStat | None = None + def expand_path(p: str) -> Path: return Path(p).expanduser().resolve() @@ -56,28 +67,73 @@ def process_file( cfg: NormalizationConfig, min_loc: int, min_stmt: int, -) -> tuple[str, dict, list, list] | None: +) -> ProcessingResult: + """ + Process a single Python file with comprehensive error handling. + + Args: + filepath: Absolute path to the file + root: Root directory of the scan + cfg: Normalization configuration + min_loc: Minimum lines of code to consider a function + min_stmt: Minimum statements to consider a function + + Returns: + ProcessingResult object indicating success/failure and containing + extracted units/blocks if successful. + """ + try: - source = Path(filepath).read_text("utf-8") - except UnicodeDecodeError: - return None - - stat = file_stat_signature(filepath) - module_name = module_name_from_path(root, filepath) - - units, blocks = extract_units_from_source( - source=source, - filepath=filepath, - module_name=module_name, - cfg=cfg, - min_loc=min_loc, - min_stmt=min_stmt, - ) + # Check file size + try: + st_size = os.path.getsize(filepath) + if st_size > MAX_FILE_SIZE: + return ProcessingResult( + filepath=filepath, + success=False, + error=f"File too large: {st_size} bytes (max {MAX_FILE_SIZE})", + ) + except OSError as e: + return ProcessingResult( + filepath=filepath, success=False, error=f"Cannot stat file: {e}" + ) - return filepath, stat, units, blocks + try: + source = Path(filepath).read_text("utf-8") + except UnicodeDecodeError as e: + return ProcessingResult( + filepath=filepath, success=False, error=f"Encoding error: {e}" + ) + + stat = file_stat_signature(filepath) + module_name = module_name_from_path(root, filepath) + + units, blocks = extract_units_from_source( + source=source, + filepath=filepath, + module_name=module_name, + cfg=cfg, + min_loc=min_loc, + min_stmt=min_stmt, + ) + return ProcessingResult( + filepath=filepath, + success=True, + units=units, + blocks=blocks, + stat=stat, + ) -def print_banner(): + except Exception as e: + return ProcessingResult( + filepath=filepath, + success=False, + error=f"Unexpected error: {type(e).__name__}: {e}", + ) + + +def print_banner() -> None: console.print( Panel.fit( "[bold white]CodeClone[/bold white] [dim]v1.2.1[/dim]\n" @@ -185,9 +241,13 @@ def main() -> None: print_banner() - root_path = Path(args.root).resolve() - if not root_path.exists(): - console.print(f"[error]Root path does not exist: {root_path}[/error]") + try: + root_path = Path(args.root).resolve() + if not root_path.exists(): + console.print(f"[error]Root path does not exist: {root_path}[/error]") + sys.exit(1) + except Exception as e: + console.print(f"[error]Invalid root path: {e}[/error]") sys.exit(1) console.print(f"[info]Scanning root:[/info] {root_path}") @@ -197,101 +257,215 @@ def main() -> None: cache_path = Path(args.cache_dir).expanduser() cache = Cache(cache_path) cache.load() + if cache.load_warning: + console.print(f"[warning]{cache.load_warning}[/warning]") - all_units: list[dict] = [] - all_blocks: list[dict] = [] + all_units: list[dict[str, Any]] = [] + all_blocks: list[dict[str, Any]] = [] changed_files_count = 0 files_to_process: list[str] = [] + def _get_cached_entry( + fp: str, + ) -> tuple[FileStat | None, CacheEntry | None, str | None]: + try: + stat = file_stat_signature(fp) + except OSError as e: + return None, None, f"[warning]Skipping file {fp}: {e}[/warning]" + cached = cache.get_file_entry(fp) + return stat, cached, None + + def _safe_process_file(fp: str) -> ProcessingResult | None: + try: + return process_file( + fp, + str(root_path), + cfg, + args.min_loc, + args.min_stmt, + ) + except Exception as e: + console.print(f"[warning]Worker failed: {e}[/warning]") + return None + + def _safe_future_result(future: Any) -> tuple[ProcessingResult | None, str | None]: + try: + return future.result(), None + except Exception as e: + return None, str(e) + # Discovery phase with console.status("[bold green]Discovering Python files...", spinner="dots"): - for fp in iter_py_files(str(root_path)): - stat = file_stat_signature(fp) - cached = cache.get_file_entry(fp) - if cached and cached.get("stat") == stat: - all_units.extend(cached.get("units", [])) - all_blocks.extend(cached.get("blocks", [])) - else: - files_to_process.append(fp) + try: + for fp in iter_py_files(str(root_path)): + stat, cached, warn = _get_cached_entry(fp) + if warn: + console.print(warn) + continue + if cached and cached.get("stat") == stat: + all_units.extend( + cast( + list[dict[str, Any]], + cast(object, cached.get("units", [])), + ) + ) + all_blocks.extend( + cast( + list[dict[str, Any]], + cast(object, cached.get("blocks", [])), + ) + ) + else: + files_to_process.append(fp) + except Exception as e: + console.print(f"[error]Scan failed: {e}[/error]") + sys.exit(1) total_files = len(files_to_process) + failed_files = [] # Processing phase if total_files > 0: - if args.no_progress: - console.print(f"[info]Processing {total_files} changed files...[/info]") - with ProcessPoolExecutor(max_workers=args.processes) as executor: - futures = [ - executor.submit( - process_file, - fp, - str(root_path), - cfg, - args.min_loc, - args.min_stmt, + + def handle_result(result: ProcessingResult) -> None: + nonlocal changed_files_count + if result.success and result.stat: + cache.put_file_entry( + result.filepath, + result.stat, + result.units or [], + result.blocks or [], + ) + changed_files_count += 1 + if result.units: + all_units.extend([asdict(u) for u in result.units]) + if result.blocks: + all_blocks.extend([asdict(b) for b in result.blocks]) + else: + failed_files.append(f"{result.filepath}: {result.error}") + + def process_sequential(with_progress: bool) -> None: + if with_progress: + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeElapsedColumn(), + console=console, + ) as progress: + task = progress.add_task( + f"Analyzing {total_files} files...", total=total_files ) - for fp in files_to_process - ] - for future in as_completed(futures): - try: - result = future.result() - except Exception as e: - console.print(f"[warning]Failed to process file: {e}[/warning]") - continue - - if result: - fp, stat, units, blocks = result - cache.put_file_entry(fp, stat, units, blocks) - changed_files_count += 1 - all_units.extend([u.__dict__ for u in units]) - all_blocks.extend([b.__dict__ for b in blocks]) - else: - with Progress( - SpinnerColumn(), - TextColumn("[progress.description]{task.description}"), - BarColumn(), - TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), - TimeElapsedColumn(), - console=console, - ) as progress: - task = progress.add_task( - f"Analyzing {total_files} files...", total=total_files + for fp in files_to_process: + result = _safe_process_file(fp) + if result is not None: + handle_result(result) + progress.advance(task) + else: + console.print( + f"[info]Processing {total_files} changed files...[/info]" ) - with ProcessPoolExecutor(max_workers=args.processes) as executor: - futures = [ - executor.submit( - process_file, - fp, - str(root_path), - cfg, - args.min_loc, - args.min_stmt, + for fp in files_to_process: + result = _safe_process_file(fp) + if result is not None: + handle_result(result) + + try: + with ProcessPoolExecutor(max_workers=args.processes) as executor: + if args.no_progress: + console.print( + f"[info]Processing {total_files} changed files...[/info]" + ) + + # Process in batches to manage memory + for i in range(0, total_files, BATCH_SIZE): + batch = files_to_process[i : i + BATCH_SIZE] + futures = [ + executor.submit( + process_file, + fp, + str(root_path), + cfg, + args.min_loc, + args.min_stmt, + ) + for fp in batch + ] + + for future in as_completed(futures): + result, err = _safe_future_result(future) + if result is not None: + handle_result(result) + elif err is not None: + console.print( + "[warning]Failed to process batch item: " + f"{err}[/warning]" + ) + + else: + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), + TimeElapsedColumn(), + console=console, + ) as progress: + task = progress.add_task( + f"Analyzing {total_files} files...", total=total_files ) - for fp in files_to_process - ] - for future in as_completed(futures): - try: - result = future.result() - except Exception: - # Log error but keep progress bar moving? - # console.print might break progress bar layout, better to rely on rich logging or just skip - # console.print(f"[warning]Failed to process file: {e}[/warning]") - continue - finally: - progress.advance(task) - - if result: - fp, stat, units, blocks = result - cache.put_file_entry(fp, stat, units, blocks) - changed_files_count += 1 - all_units.extend([u.__dict__ for u in units]) - all_blocks.extend([b.__dict__ for b in blocks]) + + # Process in batches + for i in range(0, total_files, BATCH_SIZE): + batch = files_to_process[i : i + BATCH_SIZE] + futures = [ + executor.submit( + process_file, + fp, + str(root_path), + cfg, + args.min_loc, + args.min_stmt, + ) + for fp in batch + ] + + for future in as_completed(futures): + result, err = _safe_future_result(future) + if result is not None: + handle_result(result) + elif err is not None: + # Should rarely happen due to try/except + # in process_file. + console.print( + f"[warning]Worker failed: {err}[/warning]" + ) + progress.advance(task) + except (OSError, RuntimeError, PermissionError) as e: + console.print( + "[warning]Parallel processing unavailable, " + f"falling back to sequential: {e}[/warning]" + ) + process_sequential(with_progress=not args.no_progress) + + if failed_files: + console.print( + f"\n[warning]⚠ {len(failed_files)} files failed to process:[/warning]" + ) + for failure in failed_files[:10]: + console.print(f" • {failure}") + if len(failed_files) > 10: + console.print(f" ... and {len(failed_files) - 10} more") # Analysis phase with console.status("[bold green]Grouping clones...", spinner="dots"): func_groups = build_groups(all_units) block_groups = build_block_groups(all_blocks) - cache.save() + try: + cache.save() + except CacheError as e: + console.print(f"[warning]Failed to save cache: {e}[/warning]") # Reporting func_clones_count = len(func_groups) @@ -300,7 +474,8 @@ def main() -> None: # Baseline Logic baseline_path = Path(args.baseline).expanduser().resolve() - # If user didn't specify path, and default logic applies, baseline_path is now ./codeclone_baseline.json + # If user didn't specify path and default logic applies, baseline_path + # is now ./codeclone_baseline.json baseline = Baseline(baseline_path) baseline_exists = baseline_path.exists() @@ -310,7 +485,9 @@ def main() -> None: else: if not args.update_baseline: console.print( - f"[warning]Baseline file not found at: [bold]{baseline_path}[/bold][/warning]\n" + "[warning]Baseline file not found at: [bold]" + f"{baseline_path}" + "[/bold][/warning]\n" "[dim]Comparing against an empty baseline. " "Use --update-baseline to create it.[/dim]" ) @@ -365,7 +542,7 @@ def main() -> None: out = Path(args.json_out).expanduser().resolve() out.parent.mkdir(parents=True, exist_ok=True) out.write_text( - to_json({"functions": func_groups, "blocks": block_groups}), + to_json_report(func_groups, block_groups), "utf-8", ) console.print(f"[info]JSON report saved:[/info] {out}") @@ -392,8 +569,9 @@ def main() -> None: sys.exit(3) if 0 <= args.fail_threshold < (func_clones_count + block_clones_count): + total = func_clones_count + block_clones_count console.print( - f"\n[error]❌ FAILED: Total clones ({func_clones_count + block_clones_count}) " + f"\n[error]❌ FAILED: Total clones ({total}) " f"exceed threshold ({args.fail_threshold})![/error]" ) sys.exit(2) diff --git a/codeclone/extractor.py b/codeclone/extractor.py index 031f652..02f9730 100644 --- a/codeclone/extractor.py +++ b/codeclone/extractor.py @@ -9,21 +9,24 @@ from __future__ import annotations import ast +import os +import signal +from collections.abc import Iterator +from contextlib import contextmanager from dataclasses import dataclass -from typing import Sequence -from .blocks import extract_blocks, BlockUnit +from .blocks import BlockUnit, extract_blocks from .cfg import CFGBuilder -from .fingerprint import sha1, bucket_loc +from .errors import ParseError +from .fingerprint import bucket_loc, sha1 from .normalize import NormalizationConfig, normalized_ast_dump_from_list - # ========================= # Data structures # ========================= -@dataclass(frozen=True) +@dataclass(frozen=True, slots=True) class Unit: qualname: str filepath: str @@ -39,6 +42,67 @@ class Unit: # Helpers # ========================= +PARSE_TIMEOUT_SECONDS = 5 + + +class _ParseTimeoutError(Exception): + pass + + +@contextmanager +def _parse_limits(timeout_s: int) -> Iterator[None]: + if os.name != "posix" or timeout_s <= 0: + yield + return + + old_handler = signal.getsignal(signal.SIGALRM) + + def _timeout_handler(_signum: int, _frame: object) -> None: + raise _ParseTimeoutError("AST parsing timeout") + + old_limits: tuple[int, int] | None = None + try: + signal.signal(signal.SIGALRM, _timeout_handler) + signal.setitimer(signal.ITIMER_REAL, timeout_s) + + try: + import resource + + old_limits = resource.getrlimit(resource.RLIMIT_CPU) + soft, hard = old_limits + new_soft = ( + min(timeout_s, soft) if soft != resource.RLIM_INFINITY else timeout_s + ) + new_hard = ( + min(timeout_s + 1, hard) + if hard != resource.RLIM_INFINITY + else timeout_s + 1 + ) + resource.setrlimit(resource.RLIMIT_CPU, (new_soft, new_hard)) + except Exception: + # If resource is unavailable or cannot be set, rely on alarm only. + pass + + yield + finally: + signal.setitimer(signal.ITIMER_REAL, 0) + signal.signal(signal.SIGALRM, old_handler) + if old_limits is not None: + try: + import resource + + resource.setrlimit(resource.RLIMIT_CPU, old_limits) + except Exception: + pass + + +def _parse_with_limits(source: str, timeout_s: int) -> ast.AST: + try: + with _parse_limits(timeout_s): + return ast.parse(source) + except _ParseTimeoutError as e: + raise ParseError(str(e)) from e + def _stmt_count(node: ast.AST) -> int: body = getattr(node, "body", None) @@ -46,6 +110,8 @@ def _stmt_count(node: ast.AST) -> int: class _QualnameBuilder(ast.NodeVisitor): + __slots__ = ("stack", "units") + def __init__(self) -> None: self.stack: list[str] = [] self.units: list[tuple[str, ast.FunctionDef | ast.AsyncFunctionDef]] = [] @@ -56,11 +122,11 @@ def visit_ClassDef(self, node: ast.ClassDef) -> None: self.stack.pop() def visit_FunctionDef(self, node: ast.FunctionDef) -> None: - name = ".".join(self.stack + [node.name]) if self.stack else node.name + name = ".".join([*self.stack, node.name]) if self.stack else node.name self.units.append((name, node)) def visit_AsyncFunctionDef(self, node: ast.AsyncFunctionDef) -> None: - name = ".".join(self.stack + [node.name]) if self.stack else node.name + name = ".".join([*self.stack, node.name]) if self.stack else node.name self.units.append((name, node)) @@ -75,28 +141,39 @@ def get_cfg_fingerprint( qualname: str, ) -> str: """ - Build CFG, normalize it into a canonical form, and hash it. + Generate a structural fingerprint for a function using CFG analysis. + + The fingerprint is computed by: + 1. Building a Control Flow Graph (CFG) from the function + 2. Normalizing each CFG block's statements (variable names, constants, etc.) + 3. Creating a canonical representation of the CFG structure + 4. Hashing the representation with SHA-1 + + Functions with identical control flow and normalized statements will + produce the same fingerprint, even if they differ in variable names, + constants, or type annotations. + + Args: + node: Function AST node to fingerprint + cfg: Normalization configuration (what to ignore) + qualname: Qualified name for logging/debugging + + Returns: + 40-character hex SHA-1 hash of the normalized CFG """ builder = CFGBuilder() graph = builder.build(qualname, node) + # Use generator to avoid building large list of strings parts: list[str] = [] - - # Stable order for deterministic hash for block in sorted(graph.blocks, key=lambda b: b.id): - # NOTE: normalized_ast_dump_from_list must accept Sequence[ast.AST] (covariant), - # but even if it still accepts list[ast.AST], passing list[ast.stmt] will fail - # due to invariance. We pass as Sequence[ast.AST] via a typed view. - stmts_as_ast: Sequence[ast.AST] = block.statements - normalized_stmts = normalized_ast_dump_from_list(stmts_as_ast, cfg) - - successor_ids = sorted(succ.id for succ in block.successors) - + succ_ids = ",".join( + str(s.id) for s in sorted(block.successors, key=lambda s: s.id) + ) parts.append( - f"BLOCK[{block.id}]:{normalized_stmts}" - f"|SUCCESSORS:{','.join(map(str, successor_ids))}" + f"BLOCK[{block.id}]:{normalized_ast_dump_from_list(block.statements, cfg)}" + f"|SUCCESSORS:{succ_ids}" ) - return sha1("|".join(parts)) @@ -114,9 +191,9 @@ def extract_units_from_source( min_stmt: int, ) -> tuple[list[Unit], list[BlockUnit]]: try: - tree = ast.parse(source) - except SyntaxError: - return [], [] + tree = _parse_with_limits(source, PARSE_TIMEOUT_SECONDS) + except SyntaxError as e: + raise ParseError(f"Failed to parse {filepath}: {e}") from e qb = _QualnameBuilder() qb.visit(tree) diff --git a/codeclone/html_report.py b/codeclone/html_report.py index 440882d..943ed36 100644 --- a/codeclone/html_report.py +++ b/codeclone/html_report.py @@ -9,32 +9,35 @@ from __future__ import annotations import html +import importlib import itertools +from collections.abc import Iterable from dataclasses import dataclass -from pathlib import Path -from string import Template -from typing import Any, Optional, Iterable +from functools import lru_cache +from typing import Any, NamedTuple, cast from codeclone import __version__ +from codeclone.errors import FileProcessingError +from .templates import REPORT_TEMPLATE -# ============================ +# ============================ # Pairwise -# ============================ +# ============================ def pairwise(iterable: Iterable[Any]) -> Iterable[tuple[Any, Any]]: a, b = itertools.tee(iterable) next(b, None) - return zip(a, b) + return zip(a, b, strict=False) -# ============================ +# ============================ # Code snippet infrastructure -# ============================ +# ============================ -@dataclass +@dataclass(slots=True) class _Snippet: filepath: str start_line: int @@ -43,28 +46,79 @@ class _Snippet: class _FileCache: - def __init__(self) -> None: - self._lines: dict[str, list[str]] = {} + __slots__ = ("_get_lines_impl", "maxsize") + + def __init__(self, maxsize: int = 128) -> None: + self.maxsize = maxsize + # Create a bound method with lru_cache + # We need to cache on the method to have instance-level caching if we wanted + # different caches per instance. But lru_cache on method actually caches + # on the function object (class level) if not careful, + # or we use a wrapper. + # However, for this script, we usually have one reporter. + # To be safe and cleaner, we can use a method that delegates to a cached + # function, OR just use lru_cache on a method (which requires 'self' to be + # hashable, which it is by default id). + # But 'self' changes if we create new instances. + # Let's use the audit's pattern: cache the implementation. + + self._get_lines_impl = lru_cache(maxsize=maxsize)(self._read_file_range) + + def _read_file_range( + self, filepath: str, start_line: int, end_line: int + ) -> tuple[str, ...]: + if start_line < 1: + start_line = 1 + if end_line < start_line: + return () + + try: + + def _read_with_errors(errors: str) -> tuple[str, ...]: + lines: list[str] = [] + with open(filepath, encoding="utf-8", errors=errors) as f: + for lineno, line in enumerate(f, start=1): + if lineno < start_line: + continue + if lineno > end_line: + break + lines.append(line.rstrip("\n")) + return tuple(lines) - def get_lines(self, filepath: str) -> list[str]: - if filepath not in self._lines: try: - text = Path(filepath).read_text("utf-8") + return _read_with_errors("strict") except UnicodeDecodeError: - text = Path(filepath).read_text("utf-8", errors="replace") - self._lines[filepath] = text.splitlines() - return self._lines[filepath] + return _read_with_errors("replace") + except OSError as e: + raise FileProcessingError(f"Cannot read {filepath}: {e}") from e + + def get_lines_range( + self, filepath: str, start_line: int, end_line: int + ) -> tuple[str, ...]: + return self._get_lines_impl(filepath, start_line, end_line) + class _CacheInfo(NamedTuple): + hits: int + misses: int + maxsize: int | None + currsize: int -def _try_pygments(code: str) -> Optional[str]: + def cache_info(self) -> _CacheInfo: + return cast(_FileCache._CacheInfo, self._get_lines_impl.cache_info()) + + +def _try_pygments(code: str) -> str | None: try: - from pygments import highlight - from pygments.formatters import HtmlFormatter - from pygments.lexers import PythonLexer + pygments = importlib.import_module("pygments") + formatters = importlib.import_module("pygments.formatters") + lexers = importlib.import_module("pygments.lexers") except Exception: return None - result = highlight(code, PythonLexer(), HtmlFormatter(nowrap=True)) + highlight = pygments.highlight + formatter_cls = formatters.HtmlFormatter + lexer_cls = lexers.PythonLexer + result = highlight(code, lexer_cls(), formatter_cls(nowrap=True)) return result if isinstance(result, str) else None @@ -74,21 +128,23 @@ def _pygments_css(style_name: str) -> str: If Pygments is not available or style missing, returns "". """ try: - from pygments.formatters import HtmlFormatter + formatters = importlib.import_module("pygments.formatters") except Exception: return "" try: - fmt = HtmlFormatter(style=style_name) + formatter_cls = formatters.HtmlFormatter + fmt = formatter_cls(style=style_name) except Exception: try: - fmt = HtmlFormatter() + fmt = formatter_cls() except Exception: return "" try: # `.codebox` scope: pygments will emit selectors like `.codebox .k { ... }` - return fmt.get_style_defs(".codebox") + css = fmt.get_style_defs(".codebox") + return css if isinstance(css, str) else "" except Exception: return "" @@ -104,11 +160,7 @@ def _prefix_css(css: str, prefix: str) -> str: if not stripped: out_lines.append(line) continue - if ( - stripped.startswith("/*") - or stripped.startswith("*") - or stripped.startswith("*/") - ): + if stripped.startswith(("/*", "*", "*/")): out_lines.append(line) continue # Selector lines usually end with `{ @@ -126,25 +178,24 @@ def _prefix_css(css: str, prefix: str) -> str: def _render_code_block( - *, - filepath: str, - start_line: int, - end_line: int, - file_cache: _FileCache, - context: int, - max_lines: int, + *, + filepath: str, + start_line: int, + end_line: int, + file_cache: _FileCache, + context: int, + max_lines: int, ) -> _Snippet: - lines = file_cache.get_lines(filepath) - s = max(1, start_line - context) - e = min(len(lines), end_line + context) + e = end_line + context if e - s + 1 > max_lines: e = s + max_lines - 1 + lines = file_cache.get_lines_range(filepath, s, e) + numbered: list[tuple[bool, str]] = [] - for lineno in range(s, e + 1): - line = lines[lineno - 1] + for lineno, line in enumerate(lines, start=s): hit = start_line <= lineno <= end_line numbered.append((hit, f"{lineno:>5} | {line.rstrip()}")) @@ -184,643 +235,13 @@ def _group_sort_key(items: list[dict[str, Any]]) -> tuple[int, int]: ) -REPORT_TEMPLATE = Template(r""" - - - - - -${title} - - - - - -
-
-
-

${title}

-
v${version}
-
-
- -
-
-
- -
-${empty_state_html} - -${func_section} -${block_section} - - -
- - - - -""") - - def build_html_report( - *, - func_groups: dict[str, list[dict[str, Any]]], - block_groups: dict[str, list[dict[str, Any]]], - title: str = "CodeClone Report", - context_lines: int = 3, - max_snippet_lines: int = 220, + *, + func_groups: dict[str, list[dict[str, Any]]], + block_groups: dict[str, list[dict[str, Any]]], + title: str = "CodeClone Report", + context_lines: int = 3, + max_snippet_lines: int = 220, ) -> str: file_cache = _FileCache() @@ -844,24 +265,74 @@ def build_html_report( # ============================ # Icons (Inline SVG) # ============================ - ICON_SEARCH = '' - ICON_X = '' - ICON_CHEV_DOWN = '' - # ICON_CHEV_RIGHT = '' - ICON_THEME = '' - ICON_CHECK = '' - ICON_PREV = '' - ICON_NEXT = '' + ICON_SEARCH = ( + '' + '' + '' + "" + ) + ICON_X = ( + '' + '' + '' + "" + ) + ICON_CHEV_DOWN = ( + '' + '' + "" + ) + # ICON_CHEV_RIGHT = ( + # '' + # '' + # "" + # ) + ICON_THEME = ( + '' + '' + "" + ) + ICON_CHECK = ( + '' + '' + "" + ) + ICON_PREV = ( + '' + '' + "" + ) + ICON_NEXT = ( + '' + '' + "" + ) # ---------------------------- # Section renderer # ---------------------------- def render_section( - section_id: str, - section_title: str, - groups: list[tuple[str, list[dict[str, Any]]]], - pill_cls: str, + section_id: str, + section_title: str, + groups: list[tuple[str, list[dict[str, Any]]]], + pill_cls: str, ) -> str: if not groups: return "" @@ -871,26 +342,43 @@ def render_section( f'
', '
', f"

{_escape(section_title)} " - f'{len(groups)} groups

', + f'' + f"{len(groups)} groups", f""" -