Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ Unreleased
Added
#####

- Add ``TurnstileTaskProxyless`` and ``TurnstileTask`` for solving Cloudflare Turnstile captchas (with optional ``action``, ``cdata``, and ``chl_page_data`` parameters)
- Add ``AsyncAnticaptchaClient`` and ``AsyncJob`` for async/await usage with ``httpx`` (``pip install python-anticaptcha[async]``)
- Rename ``base.py`` → ``sync_client.py`` for symmetry with ``async_client.py``; backward-compatible ``base.py`` shim preserved
- Rename sync example files with ``sync_`` prefix to match ``async_`` examples
Expand Down
26 changes: 26 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,32 @@ job.join()
print(job.get_captcha_text())
```

### Solve Cloudflare Turnstile

Example snippet for Cloudflare Turnstile captcha:

```python
from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless

api_key = '174faff8fbc769e94a5862391ecfd010'
site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site
url = 'https://example.com'

client = AnticaptchaClient(api_key)
task = TurnstileTaskProxyless(url, site_key)
job = client.create_task(task)
job.join()
print(job.get_token_response())
```

The full integration example is available in file `examples/sync_turnstile_request.py`.

For Turnstile with optional parameters (action, cData):

```python
task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token")
```

### Solve funcaptcha

Example snippet for funcaptcha:
Expand Down
27 changes: 27 additions & 0 deletions docs/usage.rst
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,33 @@ Example snippet for text captcha:
job.join()
print(job.get_captcha_text())

Solve Cloudflare Turnstile
##########################

Example snippet for Cloudflare Turnstile captcha:

.. code:: python

from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless

api_key = '174faff8fbc769e94a5862391ecfd010'
site_key = '0x4AAAAAAABS7vwvV6VFfMcD' # grab from site
url = 'https://example.com'

client = AnticaptchaClient(api_key)
task = TurnstileTaskProxyless(url, site_key)
job = client.create_task(task)
job.join()
print(job.get_token_response())

The full integration example is available in file ``examples/sync_turnstile_request.py``.

For Turnstile with optional parameters (action, cData):

.. code:: python

task = TurnstileTaskProxyless(url, site_key, action="managed", cdata="token")

Solve funcaptcha
################

Expand Down
38 changes: 38 additions & 0 deletions examples/async_turnstile_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import asyncio
import re
from os import environ

import httpx

from python_anticaptcha import AsyncAnticaptchaClient, TurnstileTaskProxyless

api_key = environ["KEY"]
site_key_pattern = r'data-sitekey="(.+?)"'
url = "https://example.com" # replace with target URL


async def get_form_html(session: httpx.AsyncClient) -> str:
return (await session.get(url)).text


async def get_token(client: AsyncAnticaptchaClient, form_html: str) -> str:
site_key = re.search(site_key_pattern, form_html).group(1)
task = TurnstileTaskProxyless(website_url=url, website_key=site_key)
job = await client.create_task(task)
await job.join()
return job.get_token_response()


async def form_submit(session: httpx.AsyncClient, token: str) -> str:
return (await session.post(url, data={"cf-turnstile-response": token})).text


async def process():
async with AsyncAnticaptchaClient(api_key) as client, httpx.AsyncClient() as session:
html = await get_form_html(session)
token = await get_token(client, html)
return await form_submit(session, token)


if __name__ == "__main__":
print(asyncio.run(process()))
38 changes: 38 additions & 0 deletions examples/sync_turnstile_request.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import re
from os import environ

import requests

from python_anticaptcha import AnticaptchaClient, TurnstileTaskProxyless

api_key = environ["KEY"]
site_key_pattern = r'data-sitekey="(.+?)"'
url = "https://example.com" # replace with target URL
client = AnticaptchaClient(api_key)
session = requests.Session()


def get_form_html():
return session.get(url).text


def get_token(form_html):
site_key = re.search(site_key_pattern, form_html).group(1)
task = TurnstileTaskProxyless(website_url=url, website_key=site_key)
job = client.create_task(task)
job.join()
return job.get_token_response()


def form_submit(token):
return requests.post(url, data={"cf-turnstile-response": token}).text


def process():
html = get_form_html()
token = get_token(html)
return form_submit(token)


if __name__ == "__main__":
print(process())
50 changes: 50 additions & 0 deletions examples/sync_turnstile_request_proxy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import re
from os import environ

import requests

from python_anticaptcha import AnticaptchaClient, Proxy, TurnstileTask

api_key = environ["KEY"]
proxy_url = environ["PROXY_URL"] # eg. socks5://user:password@123.123.123.123:8888
site_key_pattern = r'data-sitekey="(.+?)"'
url = "https://example.com" # replace with target URL
client = AnticaptchaClient(api_key)
session = requests.Session()

UA = (
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 "
"(KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
)


def get_form_html():
return session.get(url).text


def get_token(form_html):
site_key = re.search(site_key_pattern, form_html).group(1)
proxy = Proxy.parse_url(proxy_url)
task = TurnstileTask(
website_url=url,
website_key=site_key,
user_agent=UA,
**proxy.to_kwargs(),
)
job = client.create_task(task)
job.join()
return job.get_token_response()


def form_submit(token):
return requests.post(url, data={"cf-turnstile-response": token}).text


def process():
html = get_form_html()
token = get_token(html)
return form_submit(token)


if __name__ == "__main__":
print(process())
4 changes: 4 additions & 0 deletions python_anticaptcha/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@
RecaptchaV2Task,
RecaptchaV2TaskProxyless,
RecaptchaV3TaskProxyless,
TurnstileTask,
TurnstileTaskProxyless,
)

AnticatpchaException = AnticaptchaException
Expand Down Expand Up @@ -79,6 +81,8 @@ def __getattr__(name: str) -> type:
"GeeTestTask",
"AntiGateTaskProxyless",
"AntiGateTask",
"TurnstileTaskProxyless",
"TurnstileTask",
"AnticaptchaException",
"AnticatpchaException",
"AsyncAnticaptchaClient",
Expand Down
79 changes: 79 additions & 0 deletions python_anticaptcha/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,6 +599,85 @@ def serialize(self, **result: Any) -> dict[str, Any]:
return data


class TurnstileTaskProxyless(BaseTask):
"""Solve a Cloudflare Turnstile challenge without a proxy.

Turnstile is Cloudflare's CAPTCHA replacement used on millions of websites.
The service automatically detects all Turnstile subtypes (manual,
non-interactive, invisible).

After the job completes, retrieve the token with
:meth:`Job.get_token_response`.

:param website_url: Full URL of the page where the Turnstile widget appears.
:param website_key: The Turnstile ``sitekey`` from the page source.
:param action: Optional action parameter passed to the Turnstile widget.
:param cdata: Optional ``cData`` token for Cloudflare-protected pages.
:param chl_page_data: Optional ``chlPageData`` token for Cloudflare pages.

Example::

task = TurnstileTaskProxyless(
website_url="https://example.com",
website_key="0x4AAAAAAABS7vwvV6VFfMcD",
)
"""

type = "TurnstileTaskProxyless"
websiteURL = None
websiteKey = None

def __init__(
self,
website_url: str,
website_key: str,
action: str | None = None,
cdata: str | None = None,
chl_page_data: str | None = None,
*args: Any,
**kwargs: Any,
) -> None:
self.websiteURL = website_url
self.websiteKey = website_key
self.action = action
self.cData = cdata
self.chlPageData = chl_page_data
super().__init__(*args, **kwargs)

def serialize(self, **result: Any) -> dict[str, Any]:
data = super().serialize(**result)
data["websiteURL"] = self.websiteURL
data["websiteKey"] = self.websiteKey
if self.action is not None:
data["action"] = self.action
if self.cData is not None:
data["cData"] = self.cData
if self.chlPageData is not None:
data["chlPageData"] = self.chlPageData
return data


class TurnstileTask(ProxyMixin, UserAgentMixin, CookieMixin, TurnstileTaskProxyless):
"""Solve a Cloudflare Turnstile challenge through a proxy.

Same as :class:`TurnstileTaskProxyless` but additionally requires
proxy, user-agent, and optional cookie parameters.

Note that the proxy-based approach is slower and requires high-quality,
self-hosted proxies.

:param user_agent: Browser User-Agent string.
:param cookies: Optional cookie string (default: ``""``).
:param proxy_type: Proxy protocol (``"http"``, ``"socks4"``, ``"socks5"``).
:param proxy_address: Proxy server hostname or IP.
:param proxy_port: Proxy server port.
:param proxy_login: Proxy username (empty string if none).
:param proxy_password: Proxy password (empty string if none).
"""

type = "TurnstileTask"


class AntiGateTask(ProxyMixin, AntiGateTaskProxyless):
"""Solve a custom AntiGate task through a proxy.

Expand Down
59 changes: 59 additions & 0 deletions tests/test_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
RecaptchaV2Task,
RecaptchaV2TaskProxyless,
RecaptchaV3TaskProxyless,
TurnstileTask,
TurnstileTaskProxyless,
)

PROXY_KWARGS = dict(
Expand Down Expand Up @@ -353,6 +355,63 @@ def test_proxy_login_omitted_when_falsy(self):
assert data["proxyPort"] == 8080


class TestTurnstileTaskProxyless:
def test_required_fields(self):
task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey")
data = task.serialize()
assert data["type"] == "TurnstileTaskProxyless"
assert data["websiteURL"] == "https://example.com"
assert data["websiteKey"] == "tkey"

def test_optional_fields_omitted(self):
task = TurnstileTaskProxyless(website_url="https://example.com", website_key="tkey")
data = task.serialize()
assert "action" not in data
assert "cData" not in data
assert "chlPageData" not in data

def test_optional_fields_included(self):
task = TurnstileTaskProxyless(
website_url="https://example.com",
website_key="tkey",
action="managed",
cdata="cdata_token",
chl_page_data="chl_token",
)
data = task.serialize()
assert data["action"] == "managed"
assert data["cData"] == "cdata_token"
assert data["chlPageData"] == "chl_token"


class TestTurnstileTask:
def test_type_and_proxy(self):
task = TurnstileTask(
website_url="https://example.com",
website_key="tkey",
**USER_AGENT_KWARGS,
**PROXY_KWARGS,
)
data = task.serialize()
assert data["type"] == "TurnstileTask"
assert data["proxyType"] == "http"
assert data["proxyAddress"] == "1.2.3.4"
assert data["proxyPort"] == 8080
assert data["userAgent"] == "Mozilla/5.0"

def test_optional_fields_with_proxy(self):
task = TurnstileTask(
website_url="https://example.com",
website_key="tkey",
action="managed",
**USER_AGENT_KWARGS,
**PROXY_KWARGS,
)
data = task.serialize()
assert data["type"] == "TurnstileTask"
assert data["action"] == "managed"


class TestCookieMixin:
def test_cookies_omitted_when_empty(self):
task = NoCaptchaTask(
Expand Down
Loading