diff --git a/package.json b/package.json index 51f9fd1..acc1592 100644 --- a/package.json +++ b/package.json @@ -21,5 +21,8 @@ "vitest": "^4.0.18", "wrangler": "^4.71.0" }, + "knip": { + "ignoreDependencies": ["@typescript/native-preview"] + }, "packageManager": "pnpm@10.28.1" } diff --git a/src/rewriter.test.ts b/src/rewriter.test.ts new file mode 100644 index 0000000..7620094 --- /dev/null +++ b/src/rewriter.test.ts @@ -0,0 +1,110 @@ +import { describe, it, expect, beforeAll, afterAll } from "vitest"; +import { unstable_dev, type Unstable_DevWorker } from "wrangler"; +import { uppercasePreservingEntities } from "./rewriter"; + +describe("uppercasePreservingEntities", () => { + it("uppercases plain text", () => { + expect(uppercasePreservingEntities("hello world")).toBe("HELLO WORLD"); + }); + + it("preserves named HTML entities", () => { + expect(uppercasePreservingEntities("a & b")).toBe("A & B"); + }); + + it("preserves hex entities", () => { + expect(uppercasePreservingEntities("it's fine")).toBe("IT'S FINE"); + }); + + it("preserves decimal entities", () => { + expect(uppercasePreservingEntities("100% done")).toBe("100% DONE"); + }); + + it("handles mixed entities and text", () => { + expect(uppercasePreservingEntities("a < b > c")).toBe("A < B > C"); + }); + + it("returns empty string unchanged", () => { + expect(uppercasePreservingEntities("")).toBe(""); + }); +}); + +describe("HTMLRewriter integration", () => { + let worker: Unstable_DevWorker; + + beforeAll(async () => { + worker = await unstable_dev("src/index.ts", { + experimental: { disableExperimentalWarning: true }, + }); + }); + + afterAll(async () => { + await worker?.stop(); + }); + + it("uppercases regular text in proxied HTML", async () => { + const resp = await worker.fetch("/browse/https://httpbin.org/html"); + if (resp.status !== 200) return; // skip if httpbin is down + const html = await resp.text(); + // httpbin /html returns a page with "Herman Melville" — should be uppercased + expect(html).toContain("HERMAN MELVILLE"); + }); + + it("preserves inline script content in body", async () => { + const resp = await worker.fetch("/browse/https://www.wikipedia.org"); + if (resp.status !== 200) return; + const html = await resp.text(); + // wikipedia has inline scripts with 'var' — should NOT be uppercased + expect(html).toContain("var "); + expect(html).not.toMatch(/\bVAR rtlLangs\b/); + }); + + it("preserves inline style content in body", async () => { + const resp = await worker.fetch("/browse/https://www.wikipedia.org"); + if (resp.status !== 200) return; + const html = await resp.text(); + // wikipedia has inline styles — should NOT be uppercased + expect(html).toMatch(/display:\s*block/i); + expect(html).not.toMatch(/DISPLAY:\s*BLOCK/); + }); + + it("injects CSS with text-transform reset for code/pre", async () => { + const resp = await worker.fetch("/browse/https://httpbin.org/html"); + if (resp.status !== 200) return; + const html = await resp.text(); + expect(html).toContain("text-transform: none !important"); + expect(html).toContain("text-transform: uppercase !important"); + }); + + it("returns 400 for missing URL", async () => { + const resp = await worker.fetch("/browse/"); + expect(resp.status).toBe(400); + }); + + it("returns 404 for unknown routes", async () => { + const resp = await worker.fetch("/unknown"); + expect(resp.status).toBe(404); + }); + + it("returns 200 for landing page", async () => { + const resp = await worker.fetch("/"); + expect(resp.status).toBe(200); + const html = await resp.text(); + expect(html).toContain("THE INTERNET"); + }); + + it("rewrites links to proxy URLs", async () => { + const resp = await worker.fetch("/browse/https://httpbin.org/html"); + if (resp.status !== 200) return; + const html = await resp.text(); + // links should be rewritten to /browse/ prefix + expect(html).toContain("/browse/"); + }); + + it("strips security headers", async () => { + const resp = await worker.fetch("/browse/https://httpbin.org/html"); + if (resp.status !== 200) return; + expect(resp.headers.get("x-frame-options")).toBeNull(); + expect(resp.headers.get("content-security-policy")).toBeNull(); + expect(resp.headers.get("access-control-allow-origin")).toBe("*"); + }); +}); diff --git a/src/rewriter.ts b/src/rewriter.ts index 8e46797..9917f1e 100644 --- a/src/rewriter.ts +++ b/src/rewriter.ts @@ -4,15 +4,29 @@ import { uppercaseScript } from "./uppercase-script"; // split on html entities, uppercase only the non-entity segments const ENTITY_PATTERN = /(&(?:#(?:x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z][a-zA-Z0-9]*);)/g; -function uppercasePreservingEntities(raw: string): string { +export function uppercasePreservingEntities(raw: string): string { return raw .split(ENTITY_PATTERN) .map((part, i) => (i % 2 === 0 ? part.toUpperCase() : part)) .join(""); } -class SimpleTextUppercaser implements HTMLRewriterElementContentHandlers { +class SkipElementTracker implements HTMLRewriterElementContentHandlers { + constructor(private uppercaser: TextUppercaser) {} + + element(el: Element) { + this.uppercaser.skipDepth++; + el.onEndTag(() => { + this.uppercaser.skipDepth--; + }); + } +} + +class TextUppercaser implements HTMLRewriterElementContentHandlers { + skipDepth = 0; + text(text: Text) { + if (this.skipDepth > 0) return; if (text.text) { text.replace(uppercasePreservingEntities(text.text), { html: true }); } @@ -71,7 +85,7 @@ class HeadInjector implements HTMLRewriterElementContentHandlers { // no tag — it would redirect /browse/... paths to the target origin // URLRewriter already resolves all relative URLs to absolute proxy paths el.append( - ``, + ``, { html: true }, ); el.append(``, { html: true }); @@ -88,6 +102,7 @@ class MetaCSPRemover implements HTMLRewriterElementContentHandlers { } export function buildRewriter(targetUrl: string): HTMLRewriter { + const uppercaser = new TextUppercaser(); return new HTMLRewriter() .on("head", new HeadInjector(targetUrl)) .on("meta", new MetaCSPRemover()) @@ -103,5 +118,6 @@ export function buildRewriter(targetUrl: string): HTMLRewriter { .on("form", new URLRewriter(targetUrl, "action")) .on("iframe", new URLRewriter(targetUrl, "src")) .on("[alt], [title], [placeholder], [aria-label]", new AttributeUppercaser()) - .on("body *", new SimpleTextUppercaser()); + .on("script, style, code, pre, textarea, noscript, svg", new SkipElementTracker(uppercaser)) + .on("body *", uppercaser); }