Basic-HTTP-Proxy-Server-with-Filtering/basic_http_proxy_server.py at main · Samuelson777/Basic-HTTP-Proxy-Server-with-Filtering · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
"""
Basic HTTP Proxy Server with URL Filtering

This script implements a simple HTTP proxy server in Python that listens for HTTP requests,
filters requests based on blocked URL patterns, forwards allowed requests to target servers,
and returns responses back to the client.

Usage:
    python basic_http_proxy_server.py

Default:
    Listens on localhost:8888
    Blocks URLs containing substrings defined in BLOCKLIST

Note:
    - Only supports HTTP, NOT HTTPS.
    - For HTTPS, complex SSL/TLS handling is required.
    - This proxy is for educational purposes only.

"""

import http.server
import socketserver
import urllib.request
from urllib.parse import urlparse
import sys
import threading

# Configuration
HOST = "localhost"
PORT = 8888

# List of URL substrings to block
BLOCKLIST = [
    "ads.example.com",
    "tracking.example",
    "malicious-site.com",
    "phishing-site.net",
]

class ProxyRequestHandler(http.server.BaseHTTPRequestHandler):
    def do_GET(self):
        # Parse the URL
        url = self.path
        parsed_url = urlparse(url)

        # Handle relative URLs (in case browser sends paths only)
        if not parsed_url.scheme:
            # Convert relative path to absolute URL with http scheme and Host header
            host = self.headers.get('Host')
            if not host:
                self.send_error(400, "Bad Request: Host header missing")
                return
            url = "http://" + host + self.path
            parsed_url = urlparse(url)

        # Check blocklist
        if any(blocked_substring in url for blocked_substring in BLOCKLIST):
            self.send_response(403)
            self.send_header('Content-Type', 'text/html')
            self.end_headers()
            message = f"<html><body><h1>403 Forbidden</h1><p>Access to {url} is blocked by proxy filter.</p></body></html>"
            self.wfile.write(message.encode('utf-8'))
            print(f"Blocked URL access attempt: {url}")
            return

        # Forward the request
        try:
            req = urllib.request.Request(url)
            # Copy headers except Host (urllib adds Host automatically)
            for key in self.headers:
                if key.lower() == 'host':
                    continue
                req.add_header(key, self.headers[key])

            with urllib.request.urlopen(req) as resp:
                self.send_response(resp.status)
                # Copy headers to the client
                for key, value in resp.getheaders():
                    # Some headers might not be safe or logical to forward
                    if key.lower() in ['transfer-encoding', 'content-encoding', 'content-length']:
                        # We'll set content-length manually
                        continue
                    self.send_header(key, value)
                content = resp.read()
                self.send_header('Content-Length', str(len(content)))
                self.end_headers()
                self.wfile.write(content)
                print(f"Proxied request: {url}")
        except Exception as e:
            self.send_error(502, f"Bad Gateway: {e}")
            print(f"Error proxying {url}: {e}")

    def do_CONNECT(self):
        # HTTPS proxying is not supported by this simple proxy.
        self.send_error(501, "Not Implemented: HTTPS not supported by this proxy.")

    def log_message(self, format, *args):
        # Override to suppress default logging or customize
        sys.stdout.write("%s - - [%s] %s\n"
                         % (self.client_address[0],
                            self.log_date_time_string(),
                            format%args))


def run_server():
    print(f"Starting proxy server on {HOST}:{PORT}")
    with socketserver.ThreadingTCPServer((HOST, PORT), ProxyRequestHandler) as httpd:
        print("Proxy server running. Configure your browser to use this proxy.")
        try:
            httpd.serve_forever()
        except KeyboardInterrupt:
            print("\nProxy server stopped.")


if __name__ == "__main__":
    run_server()