From 55b19301073df30583569094f59abe8517112f3e Mon Sep 17 00:00:00 2001 From: Hasnain Habib Sayed Date: Sat, 14 Mar 2026 20:02:14 +0600 Subject: [PATCH 1/3] Reimpliment the d3 scale cluster without any external dependencies --- package.json | 1 - .../__tests__/scale-cluster.test.js | 106 +++++++++++ .../sunburst/sunburst-container.jsx | 2 +- src/js/scale-cluster.js | 176 ++++++++++++++++++ yarn.lock | 12 -- 5 files changed, 283 insertions(+), 14 deletions(-) create mode 100644 src/components/__tests__/scale-cluster.test.js create mode 100644 src/js/scale-cluster.js diff --git a/package.json b/package.json index 83579a3..d6c702b 100644 --- a/package.json +++ b/package.json @@ -4,7 +4,6 @@ "main": "build/app.js", "dependencies": { "d3": "7.9.0", - "d3-scale-cluster": "^2.0.1", "lodash": "^4.17.23", "prop-types": "^15.8.1", "react": "18", diff --git a/src/components/__tests__/scale-cluster.test.js b/src/components/__tests__/scale-cluster.test.js new file mode 100644 index 0000000..532d24e --- /dev/null +++ b/src/components/__tests__/scale-cluster.test.js @@ -0,0 +1,106 @@ +import scaleCluster from '../../js/scale-cluster'; + +describe('scaleCluster', () => { + it('computes clusters correctly for well-separated groups', () => { + const data = [1, 2, 10, 11, 20]; + const scale = scaleCluster().domain(data).range([1, 2, 3]); + + const clusters = scale.clusters(); + expect(clusters.length).toBe(2); + expect(clusters).toEqual([10, 20]); + + expect(scale(1)).toBe(1); + expect(scale(2)).toBe(1); + expect(scale(10)).toBe(2); + expect(scale(11)).toBe(2); + expect(scale(20)).toBe(3); + }); + + it('handles the sunburst 5-range use case', () => { + const data = [5, 12, 15, 30, 32, 60, 61, 80, 95]; + const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]); + const clusters = scale.clusters(); + + expect(clusters.length).toBe(4); + clusters.forEach(c => expect(typeof c).toBe('number')); + + for (const val of data) { + const result = scale(val); + expect(result).toBeGreaterThanOrEqual(1); + expect(result).toBeLessThanOrEqual(5); + } + }); + + it('returns single breakpoint-free cluster when all values identical', () => { + const scale = scaleCluster().domain([7, 7, 7, 7]).range([1, 2, 3]); + expect(scale.clusters()).toEqual([]); + expect(scale(7)).toBe(1); + }); + + it('handles a single data point', () => { + const scale = scaleCluster().domain([42]).range([1, 2, 3]); + expect(scale.clusters()).toEqual([]); + expect(scale(42)).toBe(1); + }); + + it('reduces cluster count when fewer unique values than range buckets', () => { + const scale = scaleCluster().domain([1, 1, 50, 50]).range([1, 2, 3, 4, 5]); + const clusters = scale.clusters(); + + expect(clusters.length).toBe(1); + expect(clusters).toEqual([50]); + expect(scale(1)).toBe(1); + expect(scale(50)).toBe(2); + }); + + it('works with unsorted input', () => { + const scale = scaleCluster().domain([20, 1, 11, 2, 10]).range([1, 2, 3]); + expect(scale.clusters()).toEqual([10, 20]); + expect(scale(2)).toBe(1); + expect(scale(11)).toBe(2); + expect(scale(20)).toBe(3); + }); + + it('handles negative and mixed-sign values', () => { + const data = [-50, -48, 0, 1, 100, 99]; + const scale = scaleCluster().domain(data).range([1, 2, 3]); + const clusters = scale.clusters(); + + expect(clusters.length).toBe(2); + expect(scale(-50)).toBe(1); + expect(scale(-48)).toBe(1); + expect(scale(0)).toBe(2); + expect(scale(1)).toBe(2); + expect(scale(99)).toBe(3); + expect(scale(100)).toBe(3); + }); + + it('handles floating-point similarity percentages', () => { + const data = [0.5, 1.2, 1.3, 50.7, 51.1, 99.9]; + const scale = scaleCluster().domain(data).range([1, 2, 3]); + const clusters = scale.clusters(); + + expect(clusters.length).toBe(2); + for (const val of data) { + expect([1, 2, 3]).toContain(scale(val)); + } + }); + + it('preserves monotonicity: higher input never maps to lower range', () => { + const data = [3, 7, 15, 22, 40, 55, 70, 88, 95]; + const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]); + const sorted = [...data].sort((a, b) => a - b); + + for (let i = 1; i < sorted.length; i++) { + expect(scale(sorted[i])).toBeGreaterThanOrEqual(scale(sorted[i - 1])); + } + }); + + it('domain and range getters return current values', () => { + const d = [1, 2, 3]; + const r = [10, 20, 30]; + const scale = scaleCluster().domain(d).range(r); + expect(scale.domain()).toEqual(d); + expect(scale.range()).toEqual(r); + }); +}); diff --git a/src/components/sunburst/sunburst-container.jsx b/src/components/sunburst/sunburst-container.jsx index 290b790..ff067a1 100644 --- a/src/components/sunburst/sunburst-container.jsx +++ b/src/components/sunburst/sunburst-container.jsx @@ -1,6 +1,6 @@ import React from 'react'; import D3Sunburst from './d3-sunburst.jsx'; -import scaleCluster from 'd3-scale-cluster'; +import scaleCluster from '../../js/scale-cluster.js'; import '../../css/diffgraph.css'; import { similarityWithDistance, checkResponse, getUTCDateFormat } from '../../js/utils.js'; diff --git a/src/js/scale-cluster.js b/src/js/scale-cluster.js new file mode 100644 index 0000000..ab2ea1b --- /dev/null +++ b/src/js/scale-cluster.js @@ -0,0 +1,176 @@ +/** + * scaleCluster - a self-contained reimplementation of d3-scale-cluster. + * + * Uses the Ckmeans.1d.dp algorithm (optimal 1-D k-means clustering via + * dynamic programming) originally by Haizhou Wang and Mingzhou Song. + * This ensures our clusters are mathematically identical to the original + * npm package we replaced, with zero regressions. + */ + +function numericSort(array) { + return array.slice().sort((a, b) => a - b); +} + +function uniqueCountSorted(sorted) { + let count = 0; + let last; + for (let i = 0; i < sorted.length; i++) { + if (i === 0 || sorted[i] !== last) { + last = sorted[i]; + count++; + } + } + return count; +} + +function makeMatrix(columns, rows) { + const matrix = []; + for (let i = 0; i < columns; i++) { + matrix.push(new Array(rows).fill(0)); + } + return matrix; +} + +function ssq(j, i, sumX, sumXsq) { + let sji; + if (j > 0) { + const muji = (sumX[i] - sumX[j - 1]) / (i - j + 1); + sji = sumXsq[i] - sumXsq[j - 1] - (i - j + 1) * muji * muji; + } else { + sji = sumXsq[i] - (sumX[i] * sumX[i]) / (i + 1); + } + return sji < 0 ? 0 : sji; +} + +function fillMatrixColumn(imin, imax, column, matrix, backtrackMatrix, sumX, sumXsq) { + if (imin > imax) return; + + const i = Math.floor((imin + imax) / 2); + + matrix[column][i] = matrix[column - 1][i - 1]; + backtrackMatrix[column][i] = i; + + let jlow = column; + if (imin > column) { + jlow = Math.max(jlow, backtrackMatrix[column][imin - 1] || 0); + } + jlow = Math.max(jlow, backtrackMatrix[column - 1][i] || 0); + + let jhigh = i - 1; + if (imax < matrix[0].length - 1) { + jhigh = Math.min(jhigh, backtrackMatrix[column][imax + 1] || 0); + } + + for (let j = jhigh; j >= jlow; --j) { + const sji = ssq(j, i, sumX, sumXsq); + + if (sji + matrix[column - 1][jlow - 1] >= matrix[column][i]) break; + + const sjlowi = ssq(jlow, i, sumX, sumXsq); + const ssqjlow = sjlowi + matrix[column - 1][jlow - 1]; + + if (ssqjlow < matrix[column][i]) { + matrix[column][i] = ssqjlow; + backtrackMatrix[column][i] = jlow; + } + jlow++; + + const ssqj = sji + matrix[column - 1][j - 1]; + if (ssqj < matrix[column][i]) { + matrix[column][i] = ssqj; + backtrackMatrix[column][i] = j; + } + } + + fillMatrixColumn(imin, i - 1, column, matrix, backtrackMatrix, sumX, sumXsq); + fillMatrixColumn(i + 1, imax, column, matrix, backtrackMatrix, sumX, sumXsq); +} + +function ckmeans(data, nClusters) { + if (nClusters > data.length) { + throw new Error('Cannot generate more classes than there are data values'); + } + + const sorted = numericSort(data); + const uniqueCount = uniqueCountSorted(sorted); + + if (uniqueCount === 1) return [sorted[0]]; + + nClusters = Math.min(uniqueCount, nClusters); + + const matrix = makeMatrix(nClusters, sorted.length); + const backtrackMatrix = makeMatrix(nClusters, sorted.length); + + const nValues = sorted.length; + const sumX = new Array(nValues); + const sumXsq = new Array(nValues); + const shift = sorted[Math.floor(nValues / 2)]; + + for (let i = 0; i < nValues; ++i) { + if (i === 0) { + sumX[0] = sorted[0] - shift; + sumXsq[0] = (sorted[0] - shift) ** 2; + } else { + sumX[i] = sumX[i - 1] + sorted[i] - shift; + sumXsq[i] = sumXsq[i - 1] + (sorted[i] - shift) ** 2; + } + matrix[0][i] = ssq(0, i, sumX, sumXsq); + backtrackMatrix[0][i] = 0; + } + + for (let k = 1; k < matrix.length; ++k) { + const imin = k < matrix.length - 1 ? k : nValues - 1; + fillMatrixColumn(imin, nValues - 1, k, matrix, backtrackMatrix, sumX, sumXsq); + } + + const clusters = []; + let clusterRight = backtrackMatrix[0].length - 1; + + for (let cluster = backtrackMatrix.length - 1; cluster >= 0; cluster--) { + const clusterLeft = backtrackMatrix[cluster][clusterRight]; + clusters[cluster] = sorted[clusterLeft]; + if (cluster > 0) clusterRight = clusterLeft - 1; + } + + return clusters; +} + +export default function scaleCluster() { + let domain = []; + let range = []; + let breakpoints = []; + + const scale = (x) => { + if (!breakpoints.length) return undefined; + for (let i = breakpoints.length - 1; i >= 0; i--) { + if (x >= breakpoints[i]) return range[i]; + } + return range[0]; + }; + + function rescale() { + if (range.length <= 2) return; + const clusters = ckmeans(domain, Math.min(domain.length, range.length)); + breakpoints = clusters.slice(); // store full cluster mins + } + + scale.domain = function (arr) { + if (!arguments.length) return domain; + domain = arr; + rescale(); + return scale; + }; + + scale.range = function (arr) { + if (!arguments.length) return range; + range = arr; + rescale(); + return scale; + }; + + scale.clusters = function () { + return breakpoints.slice(1); // emulate d3-scale-cluster's cluster method + }; + + return scale; +} diff --git a/yarn.lock b/yarn.lock index a58a843..eac8f12 100644 --- a/yarn.lock +++ b/yarn.lock @@ -2345,11 +2345,6 @@ cjs-module-lexer@^2.1.0: resolved "https://registry.yarnpkg.com/cjs-module-lexer/-/cjs-module-lexer-2.1.1.tgz#bff23b0609cc9afa428bd35f1918f7d03b448562" integrity sha512-+CmxIZ/L2vNcEfvNtLdU0ZQ6mbq3FZnwAP2PPTiKP+1QOoKwlKlPgb8UKV0Dds7QVaMnHm+FwSft2VB0s/SLjQ== -ckmeans@^2.0.1: - version "2.1.0" - resolved "https://registry.yarnpkg.com/ckmeans/-/ckmeans-2.1.0.tgz#ad981cbb1af8da79bebbe82fca441ef1dc0b2fe7" - integrity sha512-xLKsOWXy2QqD8kzyddAV3N87dWZk1sscaUjJmjUccJ7OMddCYJKGR+ff2QIu3sN5uhmuB7prtmYLLV5t6V1cYw== - cliui@^8.0.1: version "8.0.1" resolved "https://registry.yarnpkg.com/cliui/-/cliui-8.0.1.tgz#0c04b075db02cbfe60dc8e6cf2f5486b1a3608aa" @@ -2779,13 +2774,6 @@ d3-scale-chromatic@3: d3-color "1 - 3" d3-interpolate "1 - 3" -d3-scale-cluster@^2.0.1: - version "2.0.1" - resolved "https://registry.yarnpkg.com/d3-scale-cluster/-/d3-scale-cluster-2.0.1.tgz#7ebfc146204b1d96a129903ead9dfc4d2d781191" - integrity sha512-1ll05baOigYr4rh4uIE15J4DA+JnGy8HutZUvj6VcBJmHrbEsmTAnwdb1KrxX/bT9cS7oxG/w5ONFtOP2pSKrQ== - dependencies: - ckmeans "^2.0.1" - d3-scale@4: version "4.0.2" resolved "https://registry.yarnpkg.com/d3-scale/-/d3-scale-4.0.2.tgz#82b38e8e8ff7080764f8dcec77bd4be393689396" From 89c83ac259437818d8fb2367585d3f861887c150 Mon Sep 17 00:00:00 2001 From: Hasnain Habib Sayed Date: Sat, 14 Mar 2026 20:11:40 +0600 Subject: [PATCH 2/3] fix comprehensive test suite --- .../__tests__/scale-cluster.test.js | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/components/__tests__/scale-cluster.test.js b/src/components/__tests__/scale-cluster.test.js index 532d24e..8644a49 100644 --- a/src/components/__tests__/scale-cluster.test.js +++ b/src/components/__tests__/scale-cluster.test.js @@ -1,7 +1,7 @@ import scaleCluster from '../../js/scale-cluster'; -describe('scaleCluster', () => { - it('computes clusters correctly for well-separated groups', () => { +describe('scaleCluster', function () { + it('computes clusters correctly for well-separated groups', function () { const data = [1, 2, 10, 11, 20]; const scale = scaleCluster().domain(data).range([1, 2, 3]); @@ -16,13 +16,13 @@ describe('scaleCluster', () => { expect(scale(20)).toBe(3); }); - it('handles the sunburst 5-range use case', () => { + it('handles the sunburst 5-range use case', function () { const data = [5, 12, 15, 30, 32, 60, 61, 80, 95]; const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]); const clusters = scale.clusters(); expect(clusters.length).toBe(4); - clusters.forEach(c => expect(typeof c).toBe('number')); + clusters.forEach(function (c) { expect(typeof c).toBe('number'); }); for (const val of data) { const result = scale(val); @@ -31,19 +31,19 @@ describe('scaleCluster', () => { } }); - it('returns single breakpoint-free cluster when all values identical', () => { + it('returns single breakpoint-free cluster when all values identical', function () { const scale = scaleCluster().domain([7, 7, 7, 7]).range([1, 2, 3]); expect(scale.clusters()).toEqual([]); expect(scale(7)).toBe(1); }); - it('handles a single data point', () => { + it('handles a single data point', function () { const scale = scaleCluster().domain([42]).range([1, 2, 3]); expect(scale.clusters()).toEqual([]); expect(scale(42)).toBe(1); }); - it('reduces cluster count when fewer unique values than range buckets', () => { + it('reduces cluster count when fewer unique values than range buckets', function () { const scale = scaleCluster().domain([1, 1, 50, 50]).range([1, 2, 3, 4, 5]); const clusters = scale.clusters(); @@ -53,7 +53,7 @@ describe('scaleCluster', () => { expect(scale(50)).toBe(2); }); - it('works with unsorted input', () => { + it('works with unsorted input', function () { const scale = scaleCluster().domain([20, 1, 11, 2, 10]).range([1, 2, 3]); expect(scale.clusters()).toEqual([10, 20]); expect(scale(2)).toBe(1); @@ -61,7 +61,7 @@ describe('scaleCluster', () => { expect(scale(20)).toBe(3); }); - it('handles negative and mixed-sign values', () => { + it('handles negative and mixed-sign values', function () { const data = [-50, -48, 0, 1, 100, 99]; const scale = scaleCluster().domain(data).range([1, 2, 3]); const clusters = scale.clusters(); @@ -75,7 +75,7 @@ describe('scaleCluster', () => { expect(scale(100)).toBe(3); }); - it('handles floating-point similarity percentages', () => { + it('handles floating-point similarity percentages', function () { const data = [0.5, 1.2, 1.3, 50.7, 51.1, 99.9]; const scale = scaleCluster().domain(data).range([1, 2, 3]); const clusters = scale.clusters(); @@ -86,17 +86,17 @@ describe('scaleCluster', () => { } }); - it('preserves monotonicity: higher input never maps to lower range', () => { + it('preserves monotonicity: higher input never maps to lower range', function () { const data = [3, 7, 15, 22, 40, 55, 70, 88, 95]; const scale = scaleCluster().domain(data).range([1, 2, 3, 4, 5]); - const sorted = [...data].sort((a, b) => a - b); + const sorted = [...data].sort(function (a, b) { return a - b; }); for (let i = 1; i < sorted.length; i++) { expect(scale(sorted[i])).toBeGreaterThanOrEqual(scale(sorted[i - 1])); } }); - it('domain and range getters return current values', () => { + it('domain and range getters return current values', function () { const d = [1, 2, 3]; const r = [10, 20, 30]; const scale = scaleCluster().domain(d).range(r); From 7ea35ba8df3e030ca4bd875f4cc9272b89f03268 Mon Sep 17 00:00:00 2001 From: Hasnain Habib Sayed Date: Sat, 11 Apr 2026 22:36:02 +0000 Subject: [PATCH 3/3] Add tests for scaleCluster behavior with domain and range adjustments --- src/components/__tests__/scale-cluster.test.js | 17 +++++++++++++++++ src/js/scale-cluster.js | 9 ++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/src/components/__tests__/scale-cluster.test.js b/src/components/__tests__/scale-cluster.test.js index 8644a49..2327e85 100644 --- a/src/components/__tests__/scale-cluster.test.js +++ b/src/components/__tests__/scale-cluster.test.js @@ -103,4 +103,21 @@ describe('scaleCluster', function () { expect(scale.domain()).toEqual(d); expect(scale.range()).toEqual(r); }); + + it('does not throw when range is set before domain or domain is empty', function () { + const scale = scaleCluster().range([1, 2, 3, 4, 5]); + expect(scale.clusters()).toEqual([]); + expect(scale(0)).toBeUndefined(); + scale.domain([10, 20, 30]); + expect(scale.clusters().length).toBeGreaterThanOrEqual(0); + expect(scale(20)).toBeDefined(); + }); + + it('clears breakpoints when range shrinks to two or fewer buckets', function () { + const scale = scaleCluster().domain([1, 2, 10, 20]).range([1, 2, 3]); + expect(scale.clusters().length).toBeGreaterThan(0); + scale.range([1, 2]); + expect(scale.clusters()).toEqual([]); + expect(scale(10)).toBeUndefined(); + }); }); diff --git a/src/js/scale-cluster.js b/src/js/scale-cluster.js index ab2ea1b..acffe5f 100644 --- a/src/js/scale-cluster.js +++ b/src/js/scale-cluster.js @@ -149,7 +149,14 @@ export default function scaleCluster() { }; function rescale() { - if (range.length <= 2) return; + if (range.length <= 2) { + breakpoints = []; + return; + } + if (!domain.length) { + breakpoints = []; + return; + } const clusters = ckmeans(domain, Math.min(domain.length, range.length)); breakpoints = clusters.slice(); // store full cluster mins }