-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfix_encoding.py
More file actions
59 lines (52 loc) · 1.72 KB
/
fix_encoding.py
File metadata and controls
59 lines (52 loc) · 1.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
#!/usr/bin/env python3
import os
def fix_file(filepath):
print(f"Processing: {filepath}")
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
original = content
# Fix double-encoded UTF-8 sequences
# Each corrupted sequence and its replacement (using ASCII when sensible)
fixes = [
# Em dash variants
('\u00e2\u20ac\u201d', '-'),
('\u00e2\u20ac\u201c', '-'),
('\u00e2\u20ac\u0153', '-'),
# Bullet
('\u00e2\u20ac\u00a2', '-'),
# Right single quote
('\u00e2\u20ac\u2122', "'"),
# Arrow
('\u00e2\u2020\u2019', '->'),
# Check mark
('\u00e2\u0178\u201c', 'X'),
# Money emoji
('\u00f0\u0178\u2019\u00b8', '$'),
# Zombie emoji
('\u00f0\u0178\u00a7\u0178', 'Z'),
# Family emoji
('\u00f0\u0178\u2019\u00a8\u00e2\u20ac\u00a0\u00f0\u0178\u2019\u00a9\u00e2\u20ac\u00a0\u00f0\u0178\u2019\u00a7', 'FAM'),
# Robot emoji
('\u00f0\u0178\u00a4\u2013', 'BOT'),
# Siren emoji
('\u00f0\u0178\u0161\u00a8', '!'),
# Email emoji
('\u00f0\u0178\u201c\u00a7', '@'),
# Timer emoji
('\u00e2\u00b1\u00ef\u00b8\u008f', 'T'),
]
for bad, good in fixes:
content = content.replace(bad, good)
if content != original:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(content)
print(f" -> Fixed!")
return True
else:
print(f" -> No changes needed")
return False
for root, dirs, files in os.walk('.'):
for filename in files:
if filename.endswith('.html'):
fix_file(os.path.join(root, filename))
print("Done!")