|
| 1 | +#!/usr/bin/env node |
| 2 | +/** |
| 3 | + * Regenerate src/extensions/tiptap-extensions/Link/helpers/tlds.ts from IANA's |
| 4 | + * authoritative TLD list. |
| 5 | + * |
| 6 | + * Run with: pnpm --filter @blocknote/core update-tlds |
| 7 | + * |
| 8 | + * Encoding format ported from linkifyjs (MIT, https://github.com/nfrasser/linkifyjs): |
| 9 | + * a sorted TLD list is built into a trie, then serialized as an ASCII string |
| 10 | + * where letters descend the trie and digit runs mean "emit a word and pop N |
| 11 | + * levels back up." Shared TLD prefixes (e.g. construction/consulting/ |
| 12 | + * contractors) collapse, producing a payload smaller than a flat list. |
| 13 | + * |
| 14 | + * IDN punycode entries (XN--...) are skipped: the schemeless URL regex in |
| 15 | + * linkDetector.ts requires ASCII-only TLDs, so unicode TLDs would never reach |
| 16 | + * the validation step. |
| 17 | + */ |
| 18 | + |
| 19 | +import { writeFileSync } from "node:fs"; |
| 20 | +import { fileURLToPath } from "node:url"; |
| 21 | +import { dirname, resolve } from "node:path"; |
| 22 | + |
| 23 | +const TLDS_URL = "https://data.iana.org/TLD/tlds-alpha-by-domain.txt"; |
| 24 | + |
| 25 | +const __dirname = dirname(fileURLToPath(import.meta.url)); |
| 26 | +const OUT_PATH = resolve( |
| 27 | + __dirname, |
| 28 | + "../src/extensions/tiptap-extensions/Link/helpers/tlds.ts", |
| 29 | +); |
| 30 | + |
| 31 | +function createTrie(words) { |
| 32 | + const root = {}; |
| 33 | + for (const word of words) { |
| 34 | + let current = root; |
| 35 | + for (const letter of word) { |
| 36 | + if (!(letter in current)) { |
| 37 | + current[letter] = {}; |
| 38 | + } |
| 39 | + current = current[letter]; |
| 40 | + } |
| 41 | + current.isWord = true; |
| 42 | + } |
| 43 | + return root; |
| 44 | +} |
| 45 | + |
| 46 | +function encodeTrieHelper(trie) { |
| 47 | + const output = []; |
| 48 | + for (const k in trie) { |
| 49 | + if (k === "isWord") { |
| 50 | + output.push(0); |
| 51 | + continue; |
| 52 | + } |
| 53 | + output.push(k); |
| 54 | + output.push(...encodeTrieHelper(trie[k])); |
| 55 | + if (typeof output[output.length - 1] === "number") { |
| 56 | + output[output.length - 1] += 1; |
| 57 | + } else { |
| 58 | + output.push(1); |
| 59 | + } |
| 60 | + } |
| 61 | + return output; |
| 62 | +} |
| 63 | + |
| 64 | +function encodeTlds(tlds) { |
| 65 | + return encodeTrieHelper(createTrie(tlds)).join(""); |
| 66 | +} |
| 67 | + |
| 68 | +function decodeTlds(encoded) { |
| 69 | + const words = []; |
| 70 | + const stack = []; |
| 71 | + let i = 0; |
| 72 | + const digits = "0123456789"; |
| 73 | + while (i < encoded.length) { |
| 74 | + let popDigitCount = 0; |
| 75 | + while (digits.indexOf(encoded[i + popDigitCount]) >= 0) { |
| 76 | + popDigitCount++; |
| 77 | + } |
| 78 | + if (popDigitCount > 0) { |
| 79 | + words.push(stack.join("")); |
| 80 | + let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); |
| 81 | + while (popCount-- > 0) { |
| 82 | + stack.pop(); |
| 83 | + } |
| 84 | + i += popDigitCount; |
| 85 | + } else { |
| 86 | + stack.push(encoded[i]); |
| 87 | + i++; |
| 88 | + } |
| 89 | + } |
| 90 | + return words; |
| 91 | +} |
| 92 | + |
| 93 | +async function main() { |
| 94 | + console.log(`Fetching ${TLDS_URL}...`); |
| 95 | + const response = await fetch(TLDS_URL); |
| 96 | + if (!response.ok) { |
| 97 | + throw new Error(`Failed to fetch IANA TLDs: ${response.status}`); |
| 98 | + } |
| 99 | + const body = await response.text(); |
| 100 | + |
| 101 | + const tlds = body |
| 102 | + .split("\n") |
| 103 | + .map((line) => line.trim()) |
| 104 | + .filter((line) => line && !line.startsWith("#") && !/^XN--/i.test(line)) |
| 105 | + .map((line) => line.toLowerCase()) |
| 106 | + .sort(); |
| 107 | + |
| 108 | + console.log(`Encoding ${tlds.length} TLDs...`); |
| 109 | + const encoded = encodeTlds(tlds); |
| 110 | + |
| 111 | + console.log("Round-trip asserting..."); |
| 112 | + const decoded = decodeTlds(encoded); |
| 113 | + if (JSON.stringify(decoded) !== JSON.stringify(tlds)) { |
| 114 | + throw new Error("Encode/decode round-trip mismatch"); |
| 115 | + } |
| 116 | + |
| 117 | + const fileContents = `// THIS FILE IS AUTO-GENERATED. DO NOT EDIT DIRECTLY. |
| 118 | +// Source: ${TLDS_URL} |
| 119 | +// Regenerate with: pnpm --filter @blocknote/core update-tlds |
| 120 | +// Encoding format ported from linkifyjs (MIT) — trie collapsed into ASCII. |
| 121 | +
|
| 122 | +export const ENCODED_TLDS = |
| 123 | + "${encoded}"; |
| 124 | +`; |
| 125 | + |
| 126 | + writeFileSync(OUT_PATH, fileContents); |
| 127 | + console.log( |
| 128 | + `Wrote ${OUT_PATH} (${encoded.length} chars, ${tlds.length} TLDs)`, |
| 129 | + ); |
| 130 | +} |
| 131 | + |
| 132 | +main().catch((err) => { |
| 133 | + console.error(err); |
| 134 | + process.exit(1); |
| 135 | +}); |
0 commit comments