From be25c71ea7e711b2d810e35e5f1cebbeda477e91 Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 8 Mar 2026 10:47:51 +0000 Subject: [PATCH] Add Cloudflare Turnstile captcha support Add TurnstileTaskProxyless and TurnstileTask classes for solving Cloudflare Turnstile challenges, with optional action, cData, and chlPageData parameters. Includes sync/async examples, unit tests, and documentation updates. https://claude.ai/code/session_01KKZjR7gkNfk4vUhJb1eJr6 --- CHANGELOG.rst | 1 + README.md | 26 ++++++++ docs/usage.rst | 27 ++++++++ examples/async_turnstile_request.py | 38 ++++++++++++ examples/sync_turnstile_request.py | 38 ++++++++++++ examples/sync_turnstile_request_proxy.py | 50 +++++++++++++++ python_anticaptcha/__init__.py | 4 ++ python_anticaptcha/tasks.py | 79 ++++++++++++++++++++++++ tests/test_tasks.py | 59 ++++++++++++++++++ 9 files changed, 322 insertions(+) create mode 100644 examples/async_turnstile_request.py create mode 100644 examples/sync_turnstile_request.py create mode 100644 examples/sync_turnstile_request_proxy.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 9d7e50f..0c97a00 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -7,6 +7,7 @@ Unreleased Added ##### +- Add ``TurnstileTaskProxyless`` and ``TurnstileTask`` for solving Cloudflare Turnstile captchas (with optional ``action``, ``cdata``, and ``chl_page_data`` parameters) - Add ``AsyncAnticaptchaClient`` and ``AsyncJob`` for async/await usage with ``httpx`` (``pip install python-anticaptcha[async]``) - Rename ``base.py`` → ``sync_client.py`` for symmetry with ``async_client.py``; backward-compatible ``base.py`` shim preserved - Rename sync example files with ``sync_`` prefix to match ``async_`` examples diff --git a/README.md b/README.md index e1efe2d..da996c6 100644 --- a/README.md +++ b/README.md @@ -134,6 +134,32 @@ job.join() print(job.get_captcha_text()) ``` +### Solve Cloudflare Turnstile + +Example snippet for Cloudflare Turnstile captcha: + +```python +from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless + +api_key = '174faff8fbc769e94a5862391ecfd010' +site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site +url = 'https://example.com' + +client = AnticaptchaClient(api_key) +task = TurnstileTaskProxyless(url, site_key) +job = client.create_task(task) +job.join() +print(job.get_token_response()) +``` + +The full integration example is available in file `examples/sync_turnstile_request.py`. + +For Turnstile with optional parameters (action, cData): + +```python +task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token") +``` + ### Solve funcaptcha Example snippet for funcaptcha: diff --git a/docs/usage.rst b/docs/usage.rst index ecc3edb..d57a5b7 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -116,6 +116,33 @@ Example snippet for text captcha: job.join() print(job.get_captcha_text()) +Solve Cloudflare Turnstile +########################## + +Example snippet for Cloudflare Turnstile captcha: + +.. code:: python + + from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless + + api_key = '174faff8fbc769e94a5862391ecfd010' + site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site + url = 'https://example.com' + + client = AnticaptchaClient(api_key) + task = TurnstileTaskProxyless(url, site_key) + job = client.create_task(task) + job.join() + print(job.get_token_response()) + +The full integration example is available in file ``examples/sync_turnstile_request.py``. + +For Turnstile with optional parameters (action, cData): + +.. code:: python + + task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token") + Solve funcaptcha ################ diff --git a/examples/async_turnstile_request.py b/examples/async_turnstile_request.py new file mode 100644 index 0000000..3b9e308 --- /dev/null +++ b/examples/async_turnstile_request.py @@ -0,0 +1,38 @@ +import asyncio +import re +from os import environ + +import httpx + +from python_anticaptcha import AsyncAnticaptchaClient, TurnstileTaskProxyless + +api_key = environ["KEY"] +site_key_pattern = r'data-sitekey="(.+?)"' +url = "https://example.com" # replace with target URL + + +async def get_form_html(session: httpx.AsyncClient) -> str: + return (await session.get(url)).text + + +async def get_token(client: AsyncAnticaptchaClient, form_html: str) -> str: + site_key = re.search(site_key_pattern, form_html).group(1) + task = TurnstileTaskProxyless(website_url=url, website_key=site_key) + job = await client.create_task(task) + await job.join() + return job.get_token_response() + + +async def form_submit(session: httpx.AsyncClient, token: str) -> str: + return (await session.post(url, data={"cf-turnstile-response": token})).text + + +async def process(): + async with AsyncAnticaptchaClient(api_key) as client, httpx.AsyncClient() as session: + html = await get_form_html(session) + token = await get_token(client, html) + return await form_submit(session, token) + + +if __name__ == "__main__": + print(asyncio.run(process())) diff --git a/examples/sync_turnstile_request.py b/examples/sync_turnstile_request.py new file mode 100644 index 0000000..95f9295 --- /dev/null +++ b/examples/sync_turnstile_request.py @@ -0,0 +1,38 @@ +import re +from os import environ + +import requests + +from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless + +api_key = environ["KEY"] +site_key_pattern = r'data-sitekey="(.+?)"' +url = "https://example.com" # replace with target URL +client = AnticaptchaClient(api_key) +session = requests.Session() + + +def get_form_html(): + return session.get(url).text + + +def get_token(form_html): + site_key = re.search(site_key_pattern, form_html).group(1) + task = TurnstileTaskProxyless(website_url=url, website_key=site_key) + job = client.create_task(task) + job.join() + return job.get_token_response() + + +def form_submit(token): + return requests.post(url, data={"cf-turnstile-response": token}).text + + +def process(): + html = get_form_html() + token = get_token(html) + return form_submit(token) + + +if __name__ == "__main__": + print(process()) diff --git a/examples/sync_turnstile_request_proxy.py b/examples/sync_turnstile_request_proxy.py new file mode 100644 index 0000000..972d050 --- /dev/null +++ b/examples/sync_turnstile_request_proxy.py @@ -0,0 +1,50 @@ +import re +from os import environ + +import requests + +from python_anticaptcha import AnticaptchaClient, Proxy, TurnstileTask + +api_key = environ["KEY"] +proxy_url = environ["PROXY_URL"] # eg. socks5://user:password@123.123.123.123:8888 +site_key_pattern = r'data-sitekey="(.+?)"' +url = "https://example.com" # replace with target URL +client = AnticaptchaClient(api_key) +session = requests.Session() + +UA = ( + "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36" +) + + +def get_form_html(): + return session.get(url).text + + +def get_token(form_html): + site_key = re.search(site_key_pattern, form_html).group(1) + proxy = Proxy.parse_url(proxy_url) + task = TurnstileTask( + website_url=url, + website_key=site_key, + user_agent=UA, + **proxy.to_kwargs(), + ) + job = client.create_task(task) + job.join() + return job.get_token_response() + + +def form_submit(token): + return requests.post(url, data={"cf-turnstile-response": token}).text + + +def process(): + html = get_form_html() + token = get_token(html) + return form_submit(token) + + +if __name__ == "__main__": + print(process()) diff --git a/python_anticaptcha/__init__.py b/python_anticaptcha/__init__.py index db879a6..ac843ed 100644 --- a/python_anticaptcha/__init__.py +++ b/python_anticaptcha/__init__.py @@ -41,6 +41,8 @@ RecaptchaV2Task, RecaptchaV2TaskProxyless, RecaptchaV3TaskProxyless, + TurnstileTask, + TurnstileTaskProxyless, ) AnticatpchaException = AnticaptchaException @@ -79,6 +81,8 @@ def __getattr__(name: str) -> type: "GeeTestTask", "AntiGateTaskProxyless", "AntiGateTask", + "TurnstileTaskProxyless", + "TurnstileTask", "AnticaptchaException", "AnticatpchaException", "AsyncAnticaptchaClient", diff --git a/python_anticaptcha/tasks.py b/python_anticaptcha/tasks.py index 9b01fc0..5f6f6b2 100644 --- a/python_anticaptcha/tasks.py +++ b/python_anticaptcha/tasks.py @@ -599,6 +599,85 @@ def serialize(self, **result: Any) -> dict[str, Any]: return data +class TurnstileTaskProxyless(BaseTask): + """Solve a Cloudflare Turnstile challenge without a proxy. + + Turnstile is Cloudflare's CAPTCHA replacement used on millions of websites. + The service automatically detects all Turnstile subtypes (manual, + non-interactive, invisible). + + After the job completes, retrieve the token with + :meth:`Job.get_token_response`. + + :param website_url: Full URL of the page where the Turnstile widget appears. + :param website_key: The Turnstile ``sitekey`` from the page source. + :param action: Optional action parameter passed to the Turnstile widget. + :param cdata: Optional ``cData`` token for Cloudflare-protected pages. + :param chl_page_data: Optional ``chlPageData`` token for Cloudflare pages. + + Example:: + + task = TurnstileTaskProxyless( + website_url="https://example.com", + website_key="0x4AAAAAAABS7vwvV6VFfMcD", + ) + """ + + type = "TurnstileTaskProxyless" + websiteURL = None + websiteKey = None + + def __init__( + self, + website_url: str, + website_key: str, + action: str | None = None, + cdata: str | None = None, + chl_page_data: str | None = None, + *args: Any, + **kwargs: Any, + ) -> None: + self.websiteURL = website_url + self.websiteKey = website_key + self.action = action + self.cData = cdata + self.chlPageData = chl_page_data + super().__init__(*args, **kwargs) + + def serialize(self, **result: Any) -> dict[str, Any]: + data = super().serialize(**result) + data["websiteURL"] = self.websiteURL + data["websiteKey"] = self.websiteKey + if self.action is not None: + data["action"] = self.action + if self.cData is not None: + data["cData"] = self.cData + if self.chlPageData is not None: + data["chlPageData"] = self.chlPageData + return data + + +class TurnstileTask(ProxyMixin, UserAgentMixin, CookieMixin, TurnstileTaskProxyless): + """Solve a Cloudflare Turnstile challenge through a proxy. + + Same as :class:`TurnstileTaskProxyless` but additionally requires + proxy, user-agent, and optional cookie parameters. + + Note that the proxy-based approach is slower and requires high-quality, + self-hosted proxies. + + :param user_agent: Browser User-Agent string. + :param cookies: Optional cookie string (default: ``""``). + :param proxy_type: Proxy protocol (``"http"``, ``"socks4"``, ``"socks5"``). + :param proxy_address: Proxy server hostname or IP. + :param proxy_port: Proxy server port. + :param proxy_login: Proxy username (empty string if none). + :param proxy_password: Proxy password (empty string if none). + """ + + type = "TurnstileTask" + + class AntiGateTask(ProxyMixin, AntiGateTaskProxyless): """Solve a custom AntiGate task through a proxy. diff --git a/tests/test_tasks.py b/tests/test_tasks.py index 046400c..32ac592 100644 --- a/tests/test_tasks.py +++ b/tests/test_tasks.py @@ -17,6 +17,8 @@ RecaptchaV2Task, RecaptchaV2TaskProxyless, RecaptchaV3TaskProxyless, + TurnstileTask, + TurnstileTaskProxyless, ) PROXY_KWARGS = dict( @@ -353,6 +355,63 @@ def test_proxy_login_omitted_when_falsy(self): assert data["proxyPort"] == 8080 +class TestTurnstileTaskProxyless: + def test_required_fields(self): + task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey") + data = task.serialize() + assert data["type"] == "TurnstileTaskProxyless" + assert data["websiteURL"] == "https://example.com" + assert data["websiteKey"] == "tkey" + + def test_optional_fields_omitted(self): + task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey") + data = task.serialize() + assert "action" not in data + assert "cData" not in data + assert "chlPageData" not in data + + def test_optional_fields_included(self): + task = TurnstileTaskProxyless( + website_url="https://example.com", + website_key="tkey", + action="managed", + cdata="cdata_token", + chl_page_data="chl_token", + ) + data = task.serialize() + assert data["action"] == "managed" + assert data["cData"] == "cdata_token" + assert data["chlPageData"] == "chl_token" + + +class TestTurnstileTask: + def test_type_and_proxy(self): + task = TurnstileTask( + website_url="https://example.com", + website_key="tkey", + **USER_AGENT_KWARGS, + **PROXY_KWARGS, + ) + data = task.serialize() + assert data["type"] == "TurnstileTask" + assert data["proxyType"] == "http" + assert data["proxyAddress"] == "1.2.3.4" + assert data["proxyPort"] == 8080 + assert data["userAgent"] == "Mozilla/5.0" + + def test_optional_fields_with_proxy(self): + task = TurnstileTask( + website_url="https://example.com", + website_key="tkey", + action="managed", + **USER_AGENT_KWARGS, + **PROXY_KWARGS, + ) + data = task.serialize() + assert data["type"] == "TurnstileTask" + assert data["action"] == "managed" + + class TestCookieMixin: def test_cookies_omitted_when_empty(self): task = NoCaptchaTask(