Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,8 @@
"vitest": "^4.0.18",
"wrangler": "^4.71.0"
},
"knip": {
"ignoreDependencies": ["@typescript/native-preview"]
},
"packageManager": "pnpm@10.28.1"
}
110 changes: 110 additions & 0 deletions src/rewriter.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
import { describe, it, expect, beforeAll, afterAll } from "vitest";
import { unstable_dev, type Unstable_DevWorker } from "wrangler";
import { uppercasePreservingEntities } from "./rewriter";

describe("uppercasePreservingEntities", () => {
it("uppercases plain text", () => {
expect(uppercasePreservingEntities("hello world")).toBe("HELLO WORLD");
});

it("preserves named HTML entities", () => {
expect(uppercasePreservingEntities("a & b")).toBe("A & B");
});

it("preserves hex entities", () => {
expect(uppercasePreservingEntities("it's fine")).toBe("IT'S FINE");
});

it("preserves decimal entities", () => {
expect(uppercasePreservingEntities("100% done")).toBe("100% DONE");
});

it("handles mixed entities and text", () => {
expect(uppercasePreservingEntities("a < b > c")).toBe("A < B > C");
});

it("returns empty string unchanged", () => {
expect(uppercasePreservingEntities("")).toBe("");
});
});

describe("HTMLRewriter integration", () => {
let worker: Unstable_DevWorker;

beforeAll(async () => {
worker = await unstable_dev("src/index.ts", {
experimental: { disableExperimentalWarning: true },
});
});

afterAll(async () => {
await worker?.stop();
});

it("uppercases regular text in proxied HTML", async () => {
const resp = await worker.fetch("/browse/https://httpbin.org/html");
if (resp.status !== 200) return; // skip if httpbin is down
const html = await resp.text();
// httpbin /html returns a page with "Herman Melville" — should be uppercased
expect(html).toContain("HERMAN MELVILLE");
});
Comment on lines +44 to +50
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Several integration tests return early when the proxied fetch doesn’t yield a 200 (e.g. if the upstream site is down or outbound network is blocked). That makes the test pass without asserting anything, which can hide regressions. Prefer making these tests deterministic (fixture HTML + direct HTMLRewriter transform, or mocking fetch in the worker), or explicitly skipping with a clear condition (e.g. it.skip/it.skipIf) rather than returning mid-test.

Copilot uses AI. Check for mistakes.

it("preserves inline script content in body", async () => {
const resp = await worker.fetch("/browse/https://www.wikipedia.org");
if (resp.status !== 200) return;
const html = await resp.text();
// wikipedia has inline scripts with 'var' — should NOT be uppercased
expect(html).toContain("var ");
expect(html).not.toMatch(/\bVAR rtlLangs\b/);
});

it("preserves inline style content in body", async () => {
const resp = await worker.fetch("/browse/https://www.wikipedia.org");
if (resp.status !== 200) return;
const html = await resp.text();
// wikipedia has inline styles — should NOT be uppercased
expect(html).toMatch(/display:\s*block/i);
expect(html).not.toMatch(/DISPLAY:\s*BLOCK/);
Comment on lines +52 to +67
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These integration assertions depend on third-party page contents (e.g. Wikipedia having specific inline var/CSS patterns). This is brittle and can fail (or be skipped) if the upstream HTML changes. Consider using a minimal local HTML fixture that includes representative <script>, <style>, <code>/<pre>, etc., and asserting the transformed output against that fixture instead of live sites.

Suggested change
it("preserves inline script content in body", async () => {
const resp = await worker.fetch("/browse/https://www.wikipedia.org");
if (resp.status !== 200) return;
const html = await resp.text();
// wikipedia has inline scripts with 'var' — should NOT be uppercased
expect(html).toContain("var ");
expect(html).not.toMatch(/\bVAR rtlLangs\b/);
});
it("preserves inline style content in body", async () => {
const resp = await worker.fetch("/browse/https://www.wikipedia.org");
if (resp.status !== 200) return;
const html = await resp.text();
// wikipedia has inline styles — should NOT be uppercased
expect(html).toMatch(/display:\s*block/i);
expect(html).not.toMatch(/DISPLAY:\s*BLOCK/);
it("preserves inline script content in body", () => {
const html = `
<html>
<head></head>
<body>
<p>some body text</p>
<script>
var rtlLangs = ["ar", "he"];
</script>
</body>
</html>
`;
const transformed = uppercasePreservingEntities(html);
// inline script contents with 'var' should NOT be uppercased
expect(transformed).toContain("var rtlLangs");
expect(transformed).not.toMatch(/\bVAR rtlLangs\b/);
});
it("preserves inline style content in body", () => {
const html = `
<html>
<head></head>
<body>
<p>some other body text</p>
<style>
.example {
display: block;
}
</style>
</body>
</html>
`;
const transformed = uppercasePreservingEntities(html);
// inline style contents should NOT be uppercased
expect(transformed).toMatch(/display:\s*block/i);
expect(transformed).not.toMatch(/DISPLAY:\s*BLOCK/);

Copilot uses AI. Check for mistakes.
});

it("injects CSS with text-transform reset for code/pre", async () => {
const resp = await worker.fetch("/browse/https://httpbin.org/html");
if (resp.status !== 200) return;
const html = await resp.text();
expect(html).toContain("text-transform: none !important");
expect(html).toContain("text-transform: uppercase !important");
});

it("returns 400 for missing URL", async () => {
const resp = await worker.fetch("/browse/");
expect(resp.status).toBe(400);
});

it("returns 404 for unknown routes", async () => {
const resp = await worker.fetch("/unknown");
expect(resp.status).toBe(404);
});

it("returns 200 for landing page", async () => {
const resp = await worker.fetch("/");
expect(resp.status).toBe(200);
const html = await resp.text();
expect(html).toContain("THE INTERNET");
});

it("rewrites links to proxy URLs", async () => {
const resp = await worker.fetch("/browse/https://httpbin.org/html");
if (resp.status !== 200) return;
const html = await resp.text();
// links should be rewritten to /browse/ prefix
expect(html).toContain("/browse/");
});

it("strips security headers", async () => {
const resp = await worker.fetch("/browse/https://httpbin.org/html");
if (resp.status !== 200) return;
expect(resp.headers.get("x-frame-options")).toBeNull();
expect(resp.headers.get("content-security-policy")).toBeNull();
expect(resp.headers.get("access-control-allow-origin")).toBe("*");
});
});
24 changes: 20 additions & 4 deletions src/rewriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,29 @@ import { uppercaseScript } from "./uppercase-script";
// split on html entities, uppercase only the non-entity segments
const ENTITY_PATTERN = /(&(?:#(?:x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z][a-zA-Z0-9]*);)/g;

function uppercasePreservingEntities(raw: string): string {
export function uppercasePreservingEntities(raw: string): string {
return raw
.split(ENTITY_PATTERN)
.map((part, i) => (i % 2 === 0 ? part.toUpperCase() : part))
.join("");
}

class SimpleTextUppercaser implements HTMLRewriterElementContentHandlers {
class SkipElementTracker implements HTMLRewriterElementContentHandlers {
constructor(private uppercaser: TextUppercaser) {}

element(el: Element) {
this.uppercaser.skipDepth++;
el.onEndTag(() => {
this.uppercaser.skipDepth--;
});
}
}

class TextUppercaser implements HTMLRewriterElementContentHandlers {
skipDepth = 0;

text(text: Text) {
if (this.skipDepth > 0) return;
if (text.text) {
text.replace(uppercasePreservingEntities(text.text), { html: true });
}
Expand Down Expand Up @@ -71,7 +85,7 @@ class HeadInjector implements HTMLRewriterElementContentHandlers {
// no <base> tag — it would redirect /browse/... paths to the target origin
// URLRewriter already resolves all relative URLs to absolute proxy paths
el.append(
`<style>*:not(input):not(textarea):not(select):not(code):not(pre):not(script):not(style) { text-transform: uppercase !important; }</style>`,
`<style>*:not(input):not(textarea):not(select):not(code):not(pre):not(script):not(style) { text-transform: uppercase !important; } code, pre, textarea, svg { text-transform: none !important; }</style>`,
Copy link

Copilot AI Mar 23, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The injected CSS forces text-transform: uppercase on all elements except a small exclude list, but noscript is not excluded/reset. Even though the HTMLRewriter now skips uppercasing inside <noscript>, the CSS inheritance will still render noscript content uppercased when scripts are disabled. Consider adding :not(noscript) to the uppercase selector and/or adding noscript { text-transform: none !important; } alongside the other resets.

Suggested change
`<style>*:not(input):not(textarea):not(select):not(code):not(pre):not(script):not(style) { text-transform: uppercase !important; } code, pre, textarea, svg { text-transform: none !important; }</style>`,
`<style>*:not(input):not(textarea):not(select):not(code):not(pre):not(script):not(style):not(noscript) { text-transform: uppercase !important; } code, pre, textarea, svg, noscript { text-transform: none !important; }</style>`,

Copilot uses AI. Check for mistakes.
{ html: true },
);
el.append(`<script>${uppercaseScript}</script>`, { html: true });
Expand All @@ -88,6 +102,7 @@ class MetaCSPRemover implements HTMLRewriterElementContentHandlers {
}

export function buildRewriter(targetUrl: string): HTMLRewriter {
const uppercaser = new TextUppercaser();
return new HTMLRewriter()
.on("head", new HeadInjector(targetUrl))
.on("meta", new MetaCSPRemover())
Expand All @@ -103,5 +118,6 @@ export function buildRewriter(targetUrl: string): HTMLRewriter {
.on("form", new URLRewriter(targetUrl, "action"))
.on("iframe", new URLRewriter(targetUrl, "src"))
.on("[alt], [title], [placeholder], [aria-label]", new AttributeUppercaser())
.on("body *", new SimpleTextUppercaser());
.on("script, style, code, pre, textarea, noscript, svg", new SkipElementTracker(uppercaser))
.on("body *", uppercaser);
}
Loading