-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvalidate_ascii.py
More file actions
81 lines (68 loc) · 3.04 KB
/
validate_ascii.py
File metadata and controls
81 lines (68 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python3
"""
ASCII Validation Script
Ensures all project files contain only ASCII characters (0-127)
"""
import sys
from pathlib import Path
def check_file_ascii(filepath):
"""Check if file contains only ASCII characters (0-127) and detect emoji/Unicode"""
try:
with open(filepath, 'r', encoding='ascii') as f:
content = f.read()
# Additional check for common emoji and Unicode ranges
emoji_found = []
for i, char in enumerate(content):
char_code = ord(char)
if char_code > 127:
# Identify common problematic characters
if 0x1F600 <= char_code <= 0x1F64F: # Emoticons
emoji_found.append(f"Line {content[:i].count(chr(10))+1}: Emoticon '{char}'")
elif 0x1F300 <= char_code <= 0x1F5FF: # Misc Symbols
emoji_found.append(f"Line {content[:i].count(chr(10))+1}: Symbol '{char}'")
elif 0x1F680 <= char_code <= 0x1F6FF: # Transport Symbols
emoji_found.append(f"Line {content[:i].count(chr(10))+1}: Transport symbol '{char}'")
elif 0x2600 <= char_code <= 0x26FF: # Misc symbols
emoji_found.append(f"Line {content[:i].count(chr(10))+1}: Misc symbol '{char}'")
elif char_code in [0x2014, 0x2018, 0x2019, 0x201C, 0x201D]: # Smart quotes, em-dash
emoji_found.append(f"Line {content[:i].count(chr(10))+1}: Smart punctuation '{char}'")
if emoji_found:
return False, "Emoji/Unicode characters found: " + "; ".join(emoji_found[:3])
return True, None
except UnicodeDecodeError as e:
return False, str(e)
def main():
"""Check all relevant files for ASCII compliance"""
file_patterns = ["*.py", "*.md", "*.txt", "*.json"]
issues = []
checked = 0
print("ASCII Compliance Validator")
print("=" * 30)
for pattern in file_patterns:
for filepath in Path(".").rglob(pattern):
# Skip hidden files and directories
if any(part.startswith('.') for part in filepath.parts):
continue
checked += 1
is_ascii, error = check_file_ascii(filepath)
if is_ascii:
print(f"OK: {filepath}")
else:
print(f"FAIL: {filepath} - {error}")
issues.append((filepath, error))
print("\n" + "=" * 30)
print(f"Checked {checked} files")
if issues:
print(f"FAILED: {len(issues)} files contain non-ASCII characters")
print("\nTo fix:")
print("1. Replace smart quotes with straight quotes")
print("2. Replace em-dashes with double hyphens")
print("3. Replace ellipsis with three dots")
print("4. Remove other Unicode characters")
return False
else:
print("SUCCESS: All files are ASCII-compliant")
return True
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)