From 5174648b58282d012c2ad75c20c817a218d0b9d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 18:02:40 +0200 Subject: [PATCH 01/12] fix: adapt SDK ProxyConfiguration to crawlee v4 API Crawlee v4 reshaped `ProxyConfiguration`: - `newProxyInfo` and `newUrl` now take a single `TieredProxyOptions` argument; the previous `(sessionId, options)` pair is gone. - The protected `_handleCustomUrl(sessionId)` helper was removed; the `_callNewUrlFunction` and `_handleTieredUrl` helpers now take options only. - `ProxyInfo` (in `@crawlee/types`) no longer carries `sessionId`. Changes: - `newProxyInfo` and `newUrl` accept `string | number | TieredProxyOptions | undefined` so existing SDK callers that pass a raw `sessionId` keep working, while the override remains compatible with crawlee's v4 signature. A small `parseSessionIdOrOptions` helper discriminates and pulls `sessionId` from `options.request` when no explicit one is given. - Inlined custom-URL session stickiness via a new private `getSessionIndex(sessionId)` (replacing the removed `_handleCustomUrl`), keyed on `usedProxyUrls` like the base class. - Re-declared `sessionId?: string` on the SDK's `ProxyInfo` interface so users can still read `proxyInfo.sessionId` (v3 carried it on the base type). - Re-imported `ProxyInfo` from `@crawlee/types` (no longer re-exported from `@crawlee/core`). - Tightened a `proxyUrls.some(url => url.includes(...))` access for the new `(string | null)[]` array shape. Stacked on #583 (config redesign); rebases onto v4 once that lands. --- packages/apify/src/proxy_configuration.ts | 98 +++++++++++++++++------ 1 file changed, 75 insertions(+), 23 deletions(-) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index 231cfa6db0..badef05da8 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -1,14 +1,10 @@ -import type { - ProxyConfigurationOptions as CoreProxyConfigurationOptions, - ProxyInfo as CoreProxyInfo, -} from '@crawlee/core'; +import type { ProxyConfigurationOptions as CoreProxyConfigurationOptions } from '@crawlee/core'; import { ProxyConfiguration as CoreProxyConfiguration } from '@crawlee/core'; +import type { ProxyInfo as CoreProxyInfo } from '@crawlee/types'; import { gotScraping } from 'got-scraping'; import ow from 'ow'; import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts'; -import { cryptoRandomObjectId } from '@apify/utilities'; - import { Actor } from './actor.js'; import { Configuration } from './configuration.js'; @@ -18,6 +14,22 @@ const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4_000; const CHECK_ACCESS_MAX_ATTEMPTS = 2; const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/; +type CoreProxyOptions = Parameters[0]; + +/** + * Bridges the SDK's legacy `(sessionId)` calling style with crawlee v4's + * `(options)` shape — pulls `sessionId` from a `Request` carried in `options` + * when no explicit `sessionId` is given. + */ +function parseSessionIdOrOptions( + arg: string | number | CoreProxyOptions | undefined, +): { sessionId: string | undefined; options: CoreProxyOptions } { + if (typeof arg === 'string' || typeof arg === 'number') { + return { sessionId: String(arg), options: undefined }; + } + return { sessionId: arg?.request?.sessionId, options: arg }; +} + export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions { /** @@ -100,6 +112,13 @@ export interface ProxyConfigurationOptions * ``` */ export interface ProxyInfo extends CoreProxyInfo { + /** + * The Apify Proxy session identifier the URL was minted for, if any. + * v3 carried this on the base `ProxyInfo`; v4 dropped it, so the SDK + * re-declares it here for users that read `proxyInfo.sessionId`. + */ + sessionId?: string; + /** * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy). * If not provided, the proxy will select the groups automatically. @@ -241,7 +260,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration { this.port = port; this.usesApifyProxy = !this.proxyUrls && !this.newUrlFunction; - if (proxyUrls && proxyUrls.some((url) => url.includes('apify.com'))) { + if (proxyUrls && proxyUrls.some((url) => url?.includes('apify.com'))) { this.log.warning( 'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties instead of `proxyUrls`.\n' + 'See https://sdk.apify.com/docs/guides/proxy-management#apify-proxy-configuration', @@ -304,10 +323,18 @@ export class ProxyConfiguration extends CoreProxyConfiguration { * @return Represents information about used proxy and its configuration. */ override async newProxyInfo( - sessionId?: string | number, - options?: Parameters[1], + sessionIdOrOptions?: + | string + | number + | Parameters[0], ): Promise { - if (typeof sessionId === 'number') sessionId = `${sessionId}`; + // crawlee v4 dropped the `(sessionId, options)` overload — `newProxyInfo` + // now takes a single `TieredProxyOptions` argument and pulls `sessionId` + // from `options.request`. Keep the SDK's legacy "pass sessionId directly" + // shape working by discriminating at runtime. + const { sessionId, options } = parseSessionIdOrOptions( + sessionIdOrOptions, + ); ow( sessionId, ow.optional.string @@ -315,15 +342,15 @@ export class ProxyConfiguration extends CoreProxyConfiguration { .matches(APIFY_PROXY_VALUE_REGEX), ); - const proxyInfo = await super.newProxyInfo(sessionId, options); - if (!proxyInfo) return proxyInfo; + const url = await this.newUrl(sessionIdOrOptions); + if (!url) return undefined; const { groups, countryCode, password, port, hostname } = ( - this.usesApifyProxy ? this : new URL(proxyInfo.url) + this.usesApifyProxy ? this : new URL(url) ) as ProxyConfiguration; return { - ...proxyInfo, + url, sessionId, groups, countryCode, @@ -333,6 +360,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration { : decodeURIComponent(password!), hostname, port: port!, + proxyTier: options?.proxyTier, }; } @@ -350,10 +378,14 @@ export class ProxyConfiguration extends CoreProxyConfiguration { * For example, `http://bob:password123@proxy.example.com:8000` */ override async newUrl( - sessionId?: string | number, - options?: Parameters[1], + sessionIdOrOptions?: + | string + | number + | Parameters[0], ): Promise { - if (typeof sessionId === 'number') sessionId = `${sessionId}`; + const { sessionId, options } = parseSessionIdOrOptions( + sessionIdOrOptions, + ); ow( sessionId, ow.optional.string @@ -362,27 +394,47 @@ export class ProxyConfiguration extends CoreProxyConfiguration { ); if (this.newUrlFunction) { return ( - (await this._callNewUrlFunction(sessionId, { + (await this._callNewUrlFunction({ request: options?.request, })) ?? undefined ); } if (this.proxyUrls) { - return this._handleCustomUrl(sessionId); + // `_handleCustomUrl` was removed from `CoreProxyConfiguration` in + // v4; inline the rotation logic to preserve session-stickiness. + const index = + sessionId !== undefined + ? this.getSessionIndex(sessionId) + : (this.nextCustomUrlIndex += 1) % this.proxyUrls.length; + return this.proxyUrls[index] ?? undefined; } if (this.tieredProxyUrls) { return ( - this._handleTieredUrl( - sessionId ?? cryptoRandomObjectId(6), - options, - ).proxyUrl ?? undefined + this._handleTieredUrl(options ?? {}).proxyUrl ?? undefined ); } return this.composeDefaultUrl(sessionId); } + /** + * Stable per-session index into `proxyUrls`, replacing the removed + * `_handleCustomUrl(sessionId)` from crawlee v3. + */ + private getSessionIndex(sessionId: string): number { + if (!this.usedProxyUrls.has(sessionId)) { + this.usedProxyUrls.set( + sessionId, + this.proxyUrls![ + this.usedProxyUrls.size % this.proxyUrls!.length + ], + ); + } + return this.proxyUrls!.indexOf(this.usedProxyUrls.get(sessionId)!); + } + + protected _generateTieredProxyUrls( tieredProxyConfig: NonNullable< ProxyConfigurationOptions['tieredProxyConfig'] From dd7f72a41d1aa829caf1696a5e3824fcce7d253c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 18:34:59 +0200 Subject: [PATCH 02/12] chore: fix import sort in proxy_configuration.ts --- packages/apify/src/proxy_configuration.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index badef05da8..8a66c3ec4e 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -5,6 +5,7 @@ import { gotScraping } from 'got-scraping'; import ow from 'ow'; import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts'; + import { Actor } from './actor.js'; import { Configuration } from './configuration.js'; From bcc9b967fd2259764667e1d357821b3090f41a01 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:12:04 +0200 Subject: [PATCH 03/12] chore: prettier --- packages/apify/src/proxy_configuration.ts | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index 8a66c3ec4e..c386764476 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -333,9 +333,8 @@ export class ProxyConfiguration extends CoreProxyConfiguration { // now takes a single `TieredProxyOptions` argument and pulls `sessionId` // from `options.request`. Keep the SDK's legacy "pass sessionId directly" // shape working by discriminating at runtime. - const { sessionId, options } = parseSessionIdOrOptions( - sessionIdOrOptions, - ); + const { sessionId, options } = + parseSessionIdOrOptions(sessionIdOrOptions); ow( sessionId, ow.optional.string @@ -384,9 +383,8 @@ export class ProxyConfiguration extends CoreProxyConfiguration { | number | Parameters[0], ): Promise { - const { sessionId, options } = parseSessionIdOrOptions( - sessionIdOrOptions, - ); + const { sessionId, options } = + parseSessionIdOrOptions(sessionIdOrOptions); ow( sessionId, ow.optional.string @@ -411,9 +409,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration { } if (this.tieredProxyUrls) { - return ( - this._handleTieredUrl(options ?? {}).proxyUrl ?? undefined - ); + return this._handleTieredUrl(options ?? {}).proxyUrl ?? undefined; } return this.composeDefaultUrl(sessionId); @@ -435,7 +431,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { return this.proxyUrls!.indexOf(this.usedProxyUrls.get(sessionId)!); } - protected _generateTieredProxyUrls( tieredProxyConfig: NonNullable< ProxyConfigurationOptions['tieredProxyConfig'] From a2141574b937d10a04bd0e42dc2664cca5ab133e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:22:16 +0200 Subject: [PATCH 04/12] fix(proxy): preserve v3 rotation/username/validation semantics MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Custom URL rotation: post-increment the round-robin index so the first sessionless call returns proxyUrls[0] (was off-by-one). - Surface `username` on the returned ProxyInfo by parsing it out of the resolved URL — v3 carried it via `super.newProxyInfo`. - parseSessionIdOrOptions now rejects non-plain objects (e.g. Date, Array) so `newUrl(new Date())` throws as users expect. test: `newUrl({})` is no longer 'invalid' — empty TieredProxyOptions is a legal v4 call shape; documented the carve-out. --- packages/apify/src/proxy_configuration.ts | 31 ++++++++++++++++++++--- test/apify/proxy_configuration.test.ts | 6 +++-- 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index c386764476..dba7b46519 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -20,15 +20,29 @@ type CoreProxyOptions = Parameters[0]; /** * Bridges the SDK's legacy `(sessionId)` calling style with crawlee v4's * `(options)` shape — pulls `sessionId` from a `Request` carried in `options` - * when no explicit `sessionId` is given. + * when no explicit `sessionId` is given. Rejects values that are neither a + * sessionId nor a plain options object (e.g. `Date`, arrays). */ function parseSessionIdOrOptions( arg: string | number | CoreProxyOptions | undefined, ): { sessionId: string | undefined; options: CoreProxyOptions } { + if (arg === undefined) { + return { sessionId: undefined, options: undefined }; + } if (typeof arg === 'string' || typeof arg === 'number') { return { sessionId: String(arg), options: undefined }; } - return { sessionId: arg?.request?.sessionId, options: arg }; + if ( + typeof arg !== 'object' || + arg === null || + Array.isArray(arg) || + Object.getPrototypeOf(arg) !== Object.prototype + ) { + throw new TypeError( + 'Expected sessionId (string/number) or a TieredProxyOptions object', + ); + } + return { sessionId: arg.request?.sessionId, options: arg }; } export interface ProxyConfigurationOptions @@ -349,6 +363,13 @@ export class ProxyConfiguration extends CoreProxyConfiguration { this.usesApifyProxy ? this : new URL(url) ) as ProxyConfiguration; + // Extract `username` from the resolved URL — crawlee v3 carried it + // on `ProxyInfo` and tests rely on it (e.g. for Apify Proxy session + // formatting). v4's `super.newProxyInfo` would surface this, but we + // bypass `super` here so the SDK can keep its legacy `sessionId` + // calling convention. + const username = new URL(url).username || undefined; + return { url, sessionId, @@ -360,6 +381,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration { : decodeURIComponent(password!), hostname, port: port!, + username, proxyTier: options?.proxyTier, }; } @@ -401,10 +423,13 @@ export class ProxyConfiguration extends CoreProxyConfiguration { if (this.proxyUrls) { // `_handleCustomUrl` was removed from `CoreProxyConfiguration` in // v4; inline the rotation logic to preserve session-stickiness. + // Round-robin index for sessionless calls (post-increment so the + // first call returns proxyUrls[0]); per-session sticky mapping + // when a sessionId is provided. const index = sessionId !== undefined ? this.getSessionIndex(sessionId) - : (this.nextCustomUrlIndex += 1) % this.proxyUrls.length; + : this.nextCustomUrlIndex++ % this.proxyUrls.length; return this.proxyUrls[index] ?? undefined; } diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index 8c61a63177..16fc946810 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -175,8 +175,10 @@ describe('ProxyConfiguration', () => { proxyConfiguration.newUrl('a-b'), ).rejects.toThrow(), expect(proxyConfiguration.newUrl('a$b')).rejects.toThrow(), - // @ts-expect-error invalid input - expect(proxyConfiguration.newUrl({})).rejects.toThrow(), + // crawlee v4 made `newUrl` accept `TieredProxyOptions`, so + // an empty object is a valid (sessionless) call now. We only + // reject inputs that are neither a sessionId nor a plain + // options object. // @ts-expect-error invalid input expect(proxyConfiguration.newUrl(new Date())).rejects.toThrow(), expect( From 0c363041d75406b641ba2b9246e091f28b0e2868 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:28:48 +0200 Subject: [PATCH 05/12] fix(proxy): support legacy (sessionId, options) two-arg form; trim ProxyInfo shape MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - newUrl/newProxyInfo accept an optional second `legacyOptions` argument so existing callers that pass `(sessionId, {request})` keep working under the v4 shape too. - Returned ProxyInfo omits Apify-only fields (groups, countryCode) when not using Apify Proxy and only includes `proxyTier` when defined — matches v3's strict-deep-equal expectations. --- packages/apify/src/proxy_configuration.ts | 46 +++++++++++++++-------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index dba7b46519..e4078345c9 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -18,19 +18,21 @@ const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/; type CoreProxyOptions = Parameters[0]; /** - * Bridges the SDK's legacy `(sessionId)` calling style with crawlee v4's - * `(options)` shape — pulls `sessionId` from a `Request` carried in `options` - * when no explicit `sessionId` is given. Rejects values that are neither a - * sessionId nor a plain options object (e.g. `Date`, arrays). + * Bridges the SDK's legacy `(sessionId, options?)` calling style with + * crawlee v4's `(options)` shape — pulls `sessionId` from a `Request` + * carried in `options` when no explicit `sessionId` is given. Rejects + * values that are neither a sessionId nor a plain options object + * (e.g. `Date`, arrays). */ function parseSessionIdOrOptions( arg: string | number | CoreProxyOptions | undefined, + legacyOptions?: CoreProxyOptions, ): { sessionId: string | undefined; options: CoreProxyOptions } { if (arg === undefined) { - return { sessionId: undefined, options: undefined }; + return { sessionId: undefined, options: legacyOptions }; } if (typeof arg === 'string' || typeof arg === 'number') { - return { sessionId: String(arg), options: undefined }; + return { sessionId: String(arg), options: legacyOptions }; } if ( typeof arg !== 'object' || @@ -342,13 +344,16 @@ export class ProxyConfiguration extends CoreProxyConfiguration { | string | number | Parameters[0], + legacyOptions?: Parameters[0], ): Promise { // crawlee v4 dropped the `(sessionId, options)` overload — `newProxyInfo` // now takes a single `TieredProxyOptions` argument and pulls `sessionId` // from `options.request`. Keep the SDK's legacy "pass sessionId directly" // shape working by discriminating at runtime. - const { sessionId, options } = - parseSessionIdOrOptions(sessionIdOrOptions); + const { sessionId, options } = parseSessionIdOrOptions( + sessionIdOrOptions, + legacyOptions, + ); ow( sessionId, ow.optional.string @@ -356,7 +361,7 @@ export class ProxyConfiguration extends CoreProxyConfiguration { .matches(APIFY_PROXY_VALUE_REGEX), ); - const url = await this.newUrl(sessionIdOrOptions); + const url = await this.newUrl(sessionIdOrOptions, legacyOptions); if (!url) return undefined; const { groups, countryCode, password, port, hostname } = ( @@ -370,11 +375,12 @@ export class ProxyConfiguration extends CoreProxyConfiguration { // calling convention. const username = new URL(url).username || undefined; - return { + // Build the result lazily: omit Apify-only fields when the SDK is + // wrapping a custom `proxyUrls` rotation (matches v3 shape, which + // tests rely on with strict deep-equal). + const result: Partial = { url, sessionId, - groups, - countryCode, // this.password is not encoded, but the password from the URL will be, we need to normalize password: this.usesApifyProxy ? (password ?? '') @@ -382,8 +388,15 @@ export class ProxyConfiguration extends CoreProxyConfiguration { hostname, port: port!, username, - proxyTier: options?.proxyTier, }; + if (this.usesApifyProxy) { + result.groups = groups; + if (countryCode !== undefined) result.countryCode = countryCode; + } + if (options?.proxyTier !== undefined) { + result.proxyTier = options.proxyTier; + } + return result as ProxyInfo; } /** @@ -404,9 +417,12 @@ export class ProxyConfiguration extends CoreProxyConfiguration { | string | number | Parameters[0], + legacyOptions?: Parameters[0], ): Promise { - const { sessionId, options } = - parseSessionIdOrOptions(sessionIdOrOptions); + const { sessionId, options } = parseSessionIdOrOptions( + sessionIdOrOptions, + legacyOptions, + ); ow( sessionId, ow.optional.string From b95d7846c8fab8ab0c29c35c873f87ad020751e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:30:52 +0200 Subject: [PATCH 06/12] fix(proxy): decode username; reset cached singletons in createProxyConfiguration tests - ProxyInfo.username is now the decoded form (`user@name` rather than `user%40name`), matching v3 behaviour and the test expectations. - Added a beforeEach to the `Actor.createProxyConfiguration()` describe that resets serviceLocator + Configuration.globalConfig + Actor._instance so each test sees the env vars it sets. --- packages/apify/src/proxy_configuration.ts | 8 ++++++-- test/apify/proxy_configuration.test.ts | 21 +++++++++++++++++++-- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index e4078345c9..7e8182f08d 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -372,8 +372,12 @@ export class ProxyConfiguration extends CoreProxyConfiguration { // on `ProxyInfo` and tests rely on it (e.g. for Apify Proxy session // formatting). v4's `super.newProxyInfo` would surface this, but we // bypass `super` here so the SDK can keep its legacy `sessionId` - // calling convention. - const username = new URL(url).username || undefined; + // calling convention. Decode the URL-encoded username so callers + // see the human-readable form (matches v3 behaviour). + const rawUsername = new URL(url).username; + const username = rawUsername + ? decodeURIComponent(rawUsername) + : undefined; // Build the result lazily: omit Apify-only fields when the SDK is // wrapping a custom `proxyUrls` rotation (matches v3 shape, which diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index 16fc946810..ecee1ca7a5 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -1,10 +1,23 @@ -import { Actor, ProxyConfiguration } from 'apify'; +import { Actor, Configuration, ProxyConfiguration } from 'apify'; import { UserClient } from 'apify-client'; -import { type Dictionary, Request, sleep } from 'crawlee'; +import { type Dictionary, Request, serviceLocator, sleep } from 'crawlee'; import { gotScraping } from 'got-scraping'; import { APIFY_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts'; +// crawlee v4's Configuration resolves env vars eagerly at construction, +// and the SDK keeps `Configuration.globalConfig` plus `Actor._instance` as +// cached singletons. Tests in this file mutate proxy-related env vars at +// runtime, so we have to clear those caches before each test. +function resetGlobalState() { + serviceLocator.reset(); + ( + Configuration as unknown as { globalConfig?: Configuration } + ).globalConfig = undefined; + // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field + (Actor as unknown as { _instance?: Actor })._instance = undefined; +} + const groups = ['GROUP1', 'GROUP2']; const hostname = LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]; const port = Number(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]); @@ -559,6 +572,10 @@ describe('ProxyConfiguration', () => { describe('Actor.createProxyConfiguration()', () => { const userData = { proxy: { password } }; + beforeEach(() => { + resetGlobalState(); + }); + test('should work with all options', async () => { const status = { connected: true }; const proxyUrl = proxyUrlNoSession; From 824cd0d66488ec2ae3aa80730b47739969481d92 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 20:22:12 +0200 Subject: [PATCH 07/12] fix(proxy): drop tieredProxyUrls/tieredProxyConfig support crawlee v4 (apify/crawlee#3599, beta.51) removed `tieredProxyUrls`, `tieredProxyConfig`, `_handleTieredUrl`, and `proxyTier` from `ProxyConfiguration` / `ProxyInfo`. The SDK's wrapper used to thread those through to the base class; with the upstream API gone, that plumbing has to go too. - Remove the `tieredProxyConfig` field from the SDK's `ProxyConfigurationOptions`. - Drop the constructor branch that forwarded `tieredProxyUrls` / `tieredProxyConfig` to the base class and the now-unreachable `_generateTieredProxyUrls` helper. - Drop the `tieredProxyUrls` short-circuit and `proxyTier` field from `newUrl` / `newProxyInfo`. - Drop the corresponding test groups in `proxy_configuration.test.ts`. --- package-lock.json | 69 +++++++++++ packages/apify/src/proxy_configuration.ts | 52 +------- test/apify/proxy_configuration.test.ts | 139 ++-------------------- 3 files changed, 81 insertions(+), 179 deletions(-) diff --git a/package-lock.json b/package-lock.json index 41603ac93a..44cdccceed 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1135,6 +1135,32 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@crawlee/linkedom/node_modules/cheerio": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", + "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", + "dev": true, + "license": "MIT", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "encoding-sniffer": "^0.2.1", + "htmlparser2": "^10.1.0", + "parse5": "^7.3.0", + "parse5-htmlparser2-tree-adapter": "^7.1.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^7.19.0", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=20.18.1" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, "node_modules/@crawlee/linkedom/node_modules/dot-prop": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-8.0.2.tgz", @@ -1151,6 +1177,39 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@crawlee/linkedom/node_modules/entities": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", + "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", + "dev": true, + "license": "BSD-2-Clause", + "engines": { + "node": ">=0.12" + }, + "funding": { + "url": "https://github.com/fb55/entities?sponsor=1" + } + }, + "node_modules/@crawlee/linkedom/node_modules/htmlparser2": { + "version": "10.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", + "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", + "dev": true, + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "license": "MIT", + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.2.2", + "entities": "^7.0.1" + } + }, "node_modules/@crawlee/linkedom/node_modules/ow": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ow/-/ow-2.0.0.tgz", @@ -1185,6 +1244,16 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/@crawlee/linkedom/node_modules/undici": { + "version": "7.25.0", + "resolved": "https://registry.npmjs.org/undici/-/undici-7.25.0.tgz", + "integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=20.18.1" + } + }, "node_modules/@crawlee/memory-storage": { "version": "4.0.0-beta.56", "resolved": "https://registry.npmjs.org/@crawlee/memory-storage/-/memory-storage-4.0.0-beta.56.tgz", diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index 7e8182f08d..5b4cdc1236 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -85,15 +85,6 @@ export interface ProxyConfigurationOptions * configurate the proxy by UI input schema. You should use the `countryCode` option in your crawler code. */ apifyProxyCountry?: string; - - /** - * Multiple different ProxyConfigurationOptions stratified into tiers. Crawlee crawlers will switch between those tiers - * based on the blocked request statistics. - */ - tieredProxyConfig?: Omit< - ProxyConfigurationOptions, - keyof CoreProxyConfigurationOptions | 'tieredProxyConfig' - >[]; } /** @@ -229,10 +220,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { apifyProxyCountry: ow.optional.string.matches(COUNTRY_CODE_REGEX), password: ow.optional.string, - tieredProxyUrls: ow.optional.array.ofType( - ow.array.ofType(ow.string), - ), - tieredProxyConfig: ow.optional.array.ofType(ow.object), }), ); @@ -242,18 +229,11 @@ export class ProxyConfiguration extends CoreProxyConfiguration { countryCode, apifyProxyCountry, password = config.proxyPassword, - tieredProxyConfig, - tieredProxyUrls, } = options; - this.tieredProxyUrls ??= tieredProxyUrls; - - if (tieredProxyConfig) { - this.tieredProxyUrls = this._generateTieredProxyUrls( - tieredProxyConfig, - options, - ); - } + // crawlee v4 (>=beta.51) removed `tieredProxyUrls` / + // `tieredProxyConfig` (see apify/crawlee#3599) — the SDK no + // longer threads tiered config through to the base class. const groupsToUse = groups.length ? groups : apifyProxyGroups; const countryCodeToUse = countryCode || apifyProxyCountry; @@ -347,10 +327,10 @@ export class ProxyConfiguration extends CoreProxyConfiguration { legacyOptions?: Parameters[0], ): Promise { // crawlee v4 dropped the `(sessionId, options)` overload — `newProxyInfo` - // now takes a single `TieredProxyOptions` argument and pulls `sessionId` + // now takes a single `NewUrlOptions` argument and pulls `sessionId` // from `options.request`. Keep the SDK's legacy "pass sessionId directly" // shape working by discriminating at runtime. - const { sessionId, options } = parseSessionIdOrOptions( + const { sessionId } = parseSessionIdOrOptions( sessionIdOrOptions, legacyOptions, ); @@ -397,9 +377,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { result.groups = groups; if (countryCode !== undefined) result.countryCode = countryCode; } - if (options?.proxyTier !== undefined) { - result.proxyTier = options.proxyTier; - } return result as ProxyInfo; } @@ -453,10 +430,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { return this.proxyUrls[index] ?? undefined; } - if (this.tieredProxyUrls) { - return this._handleTieredUrl(options ?? {}).proxyUrl ?? undefined; - } - return this.composeDefaultUrl(sessionId); } @@ -476,21 +449,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { return this.proxyUrls!.indexOf(this.usedProxyUrls.get(sessionId)!); } - protected _generateTieredProxyUrls( - tieredProxyConfig: NonNullable< - ProxyConfigurationOptions['tieredProxyConfig'] - >, - globalOptions: ProxyConfigurationOptions, - ) { - return tieredProxyConfig.map((config) => [ - new ProxyConfiguration({ - ...globalOptions, - ...config, - tieredProxyConfig: undefined, - }).composeDefaultUrl(), - ]); - } - /** * Returns proxy username. */ diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index ecee1ca7a5..d42b0808cb 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -501,72 +501,10 @@ describe('ProxyConfiguration', () => { }); }); - describe('With tieredProxyUrls', () => { - test('proxy configuration accepts the tiered urls (Crawlee style)', async () => { - const proxyConfiguration = new ProxyConfiguration({ - tieredProxyUrls: [ - ['http://proxy.com:1111'], - ['http://proxy.com:2222'], - ['http://proxy.com:3333'], - ['http://proxy.com:4444'], - ], - }); - - // through newUrl() - expect( - await proxyConfiguration.newUrl('abc', { - request: new Request({ url: 'http://example.com' }) as any, - }), - ).toEqual('http://proxy.com:1111'); - - // through newProxyInfo() - expect( - (await proxyConfiguration.newProxyInfo('abc', { - request: new Request({ - url: 'http://example.com', - }) as any, - }))!.url, - ).toEqual('http://proxy.com:1111'); - }); - - test('shorthand tieredProxyConfig gets correctly expanded', async () => { - const proxyConfiguration = new ProxyConfiguration({ - password: 'password', - countryCode: 'DE', - tieredProxyConfig: [ - { - groups: ['GROUP1'], - countryCode: 'CZ', - }, - { - groups: ['GROUP2'], - countryCode: 'US', - }, - { - groups: ['GROUP3', 'GROUP4'], - }, - { - groups: ['GROUP3', 'GROUP4'], - countryCode: undefined, - }, - ], - }); - - // eslint-disable-next-line dot-notation - expect(proxyConfiguration['tieredProxyUrls']).toEqual([ - [ - 'http://groups-GROUP1,country-CZ:password@proxy.apify.com:8000', - ], - [ - 'http://groups-GROUP2,country-US:password@proxy.apify.com:8000', - ], - [ - 'http://groups-GROUP3+GROUP4,country-DE:password@proxy.apify.com:8000', - ], - ['http://groups-GROUP3+GROUP4:password@proxy.apify.com:8000'], - ]); - }); - }); + // `tieredProxyUrls` / `tieredProxyConfig` were removed from + // crawlee v4 (apify/crawlee#3599); the corresponding test groups + // were dropped here and in the `Actor.createProxyConfiguration()` + // describe below. }); describe('Actor.createProxyConfiguration()', () => { @@ -733,70 +671,7 @@ describe('Actor.createProxyConfiguration()', () => { gotScrapingSpy.mockRestore(); }); - describe('With tieredProxyUrls', () => { - test('proxy configuration accepts the tiered urls (Crawlee style)', async () => { - const proxyConfiguration = await Actor.createProxyConfiguration({ - tieredProxyUrls: [ - ['http://proxy.com:1111'], - ['http://proxy.com:2222'], - ['http://proxy.com:3333'], - ['http://proxy.com:4444'], - ], - }); - - // through newUrl() - expect( - await proxyConfiguration!.newUrl('abc', { - request: new Request({ url: 'http://example.com' }) as any, - }), - ).toEqual('http://proxy.com:1111'); - - // through newProxyInfo() - expect( - (await proxyConfiguration!.newProxyInfo('abc', { - request: new Request({ - url: 'http://example.com', - }) as any, - }))!.url, - ).toEqual('http://proxy.com:1111'); - }); - - test('shorthand tieredProxyConfig gets correctly expanded', async () => { - const proxyConfiguration = await Actor.createProxyConfiguration({ - password: 'password', - countryCode: 'DE', - tieredProxyConfig: [ - { - groups: ['GROUP1'], - countryCode: 'CZ', - }, - { - groups: ['GROUP2'], - countryCode: 'US', - }, - { - groups: ['GROUP3', 'GROUP4'], - }, - { - groups: ['GROUP3', 'GROUP4'], - countryCode: undefined, - }, - ], - }); - - // eslint-disable-next-line dot-notation - expect(proxyConfiguration!['tieredProxyUrls']).toEqual([ - [ - 'http://groups-GROUP1,country-CZ:password@proxy.apify.com:8000', - ], - [ - 'http://groups-GROUP2,country-US:password@proxy.apify.com:8000', - ], - [ - 'http://groups-GROUP3+GROUP4,country-DE:password@proxy.apify.com:8000', - ], - ['http://groups-GROUP3+GROUP4:password@proxy.apify.com:8000'], - ]); - }); - }); + // `tieredProxyUrls` / `tieredProxyConfig` were removed from + // crawlee v4 (apify/crawlee#3599); the corresponding test groups + // were dropped here too. }); From 91727edc5b8e5a09ca6b6b914096f9e5ea0f5688 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 21:12:58 +0200 Subject: [PATCH 08/12] chore: align package.json/lockfile with config-redesign branch (beta.51) --- package-lock.json | 69 ----------------------------------------------- 1 file changed, 69 deletions(-) diff --git a/package-lock.json b/package-lock.json index 44cdccceed..41603ac93a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1135,32 +1135,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@crawlee/linkedom/node_modules/cheerio": { - "version": "1.2.0", - "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.2.0.tgz", - "integrity": "sha512-WDrybc/gKFpTYQutKIK6UvfcuxijIZfMfXaYm8NMsPQxSYvf+13fXUJ4rztGGbJcBQ/GF55gvrZ0Bc0bj/mqvg==", - "dev": true, - "license": "MIT", - "dependencies": { - "cheerio-select": "^2.1.0", - "dom-serializer": "^2.0.0", - "domhandler": "^5.0.3", - "domutils": "^3.2.2", - "encoding-sniffer": "^0.2.1", - "htmlparser2": "^10.1.0", - "parse5": "^7.3.0", - "parse5-htmlparser2-tree-adapter": "^7.1.0", - "parse5-parser-stream": "^7.1.2", - "undici": "^7.19.0", - "whatwg-mimetype": "^4.0.0" - }, - "engines": { - "node": ">=20.18.1" - }, - "funding": { - "url": "https://github.com/cheeriojs/cheerio?sponsor=1" - } - }, "node_modules/@crawlee/linkedom/node_modules/dot-prop": { "version": "8.0.2", "resolved": "https://registry.npmjs.org/dot-prop/-/dot-prop-8.0.2.tgz", @@ -1177,39 +1151,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@crawlee/linkedom/node_modules/entities": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/entities/-/entities-7.0.1.tgz", - "integrity": "sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==", - "dev": true, - "license": "BSD-2-Clause", - "engines": { - "node": ">=0.12" - }, - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, - "node_modules/@crawlee/linkedom/node_modules/htmlparser2": { - "version": "10.1.0", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-10.1.0.tgz", - "integrity": "sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==", - "dev": true, - "funding": [ - "https://github.com/fb55/htmlparser2?sponsor=1", - { - "type": "github", - "url": "https://github.com/sponsors/fb55" - } - ], - "license": "MIT", - "dependencies": { - "domelementtype": "^2.3.0", - "domhandler": "^5.0.3", - "domutils": "^3.2.2", - "entities": "^7.0.1" - } - }, "node_modules/@crawlee/linkedom/node_modules/ow": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/ow/-/ow-2.0.0.tgz", @@ -1244,16 +1185,6 @@ "url": "https://github.com/sponsors/sindresorhus" } }, - "node_modules/@crawlee/linkedom/node_modules/undici": { - "version": "7.25.0", - "resolved": "https://registry.npmjs.org/undici/-/undici-7.25.0.tgz", - "integrity": "sha512-xXnp4kTyor2Zq+J1FfPI6Eq3ew5h6Vl0F/8d9XU5zZQf1tX9s2Su1/3PiMmUANFULpmksxkClamIZcaUqryHsQ==", - "dev": true, - "license": "MIT", - "engines": { - "node": ">=20.18.1" - } - }, "node_modules/@crawlee/memory-storage": { "version": "4.0.0-beta.56", "resolved": "https://registry.npmjs.org/@crawlee/memory-storage/-/memory-storage-4.0.0-beta.56.tgz", From c02c586edd4a54665da473aa5c9cfab276876528 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Mon, 11 May 2026 18:40:20 +0200 Subject: [PATCH 09/12] refactor(proxy): address PR review comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - proxy_configuration.ts: drop redundant `as Configuration` casts on `this.config` — the subclass introduces `config` as a parameter property typed from `Configuration.getGlobalConfig()`, so it is already `Configuration` in the subclass body. - proxy_configuration.test.ts: remove leftover `// tieredProxy* removed` gravestone comments — the removed tests are not relevant to future maintainers. Co-Authored-By: Claude Opus 4.7 (1M context) --- test/apify/proxy_configuration.test.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index d42b0808cb..358138cca8 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -500,11 +500,6 @@ describe('ProxyConfiguration', () => { } }); }); - - // `tieredProxyUrls` / `tieredProxyConfig` were removed from - // crawlee v4 (apify/crawlee#3599); the corresponding test groups - // were dropped here and in the `Actor.createProxyConfiguration()` - // describe below. }); describe('Actor.createProxyConfiguration()', () => { @@ -670,8 +665,4 @@ describe('Actor.createProxyConfiguration()', () => { gotScrapingSpy.mockRestore(); }); - - // `tieredProxyUrls` / `tieredProxyConfig` were removed from - // crawlee v4 (apify/crawlee#3599); the corresponding test groups - // were dropped here too. }); From fddbb46d3af0cf9e91521ec572790ed0c0642de1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Mon, 11 May 2026 18:50:48 +0200 Subject: [PATCH 10/12] refactor(proxy)!: drop sessionId from newUrl/newProxyInfo MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BREAKING CHANGE: `ProxyConfiguration.newUrl()` and `ProxyConfiguration.newProxyInfo()` no longer accept a `sessionId` argument. Each call now returns an independent proxy URL; for Apify Proxy a fresh random session id is minted internally so consecutive calls resolve to different IPs. For custom `proxyUrls`, URLs are rotated round-robin (no per-session sticky mapping). This matches Crawlee v4's `UserPool` design: `ProxyConfiguration` should not own session continuity — that is the `SessionPool`'s responsibility (a `Session` stores the URL it was paired with and the crawler reuses that pairing for subsequent requests bound to the same session). `ProxyInfo.sessionId` is removed; the session id is still visible in `proxyInfo.username` for Apify Proxy URLs if needed for logging. Updates: - `proxy_configuration.ts`: simplify to crawlee v4 `(options?)` shape, drop `parseSessionIdOrOptions`, `getSessionIndex`, `MAX_SESSION_ID_LENGTH`. `newUrl` delegates to `super.newUrl` for `newUrlFunction` / custom `proxyUrls` cases; only Apify Proxy URL composition is handled locally. - tests: drop sessionId-based tests, match Apify Proxy URLs by `session-` regex pattern. - docs/guides/proxy_management.mdx: explain new session semantics (handled by `SessionPool`, not `ProxyConfiguration`). - docs/upgrading/upgrading_v4.md: new v4 upgrade guide covering this change plus `Configuration`, `EventManager`, `StorageClient` / `KeyValueStore.getPublicUrl` changes from the rest of the stack. Co-Authored-By: Claude Opus 4.7 (1M context) --- docs/guides/proxy_management.mdx | 34 +--- docs/upgrading/upgrading_v4.md | 88 +++++++++ packages/apify/src/proxy_configuration.ts | 219 ++++------------------ test/apify/proxy_configuration.test.ts | 197 ++++--------------- 4 files changed, 161 insertions(+), 377 deletions(-) create mode 100644 docs/upgrading/upgrading_v4.md diff --git a/docs/guides/proxy_management.mdx b/docs/guides/proxy_management.mdx index a271a063fb..89e25469f0 100644 --- a/docs/guides/proxy_management.mdx +++ b/docs/guides/proxy_management.mdx @@ -83,35 +83,17 @@ Your crawlers will now use the selected proxies for all connections. ### IP Rotation and session management +Every call to + `proxyConfiguration.newUrl()` -allows you to pass a `sessionId` parameter. It will then be used to create a -`sessionId`-`proxyUrl` pair, and subsequent `newUrl()` calls with the same -`sessionId` will always return the same `proxyUrl`. This is extremely useful in -scraping, because you want to create the impression of a real user. See the -[session management guide](../guides/session-management) and -`SessionPool` class -for more information on how keeping a real session helps you avoid blocking. - -When no `sessionId` is provided, your proxy URLs are rotated round-robin, whereas Apify Proxy manages their rotation using black magic to get the best performance. - - +returns an independent proxy URL. For Apify Proxy that URL embeds a fresh random +session id, so consecutive calls resolve to different IP addresses; for custom +`proxyUrls` the URLs are rotated round-robin. - - -```javascript -const proxyConfiguration = await Actor.createProxyConfiguration({ - /* opts */ -}); -const sessionPool = await SessionPool.open({ - /* opts */ -}); -const session = await sessionPool.getSession(); -const proxyUrl = proxyConfiguration.newUrl(session.id); -``` - - +Session continuity (using the same IP across multiple requests, e.g. to keep a logged-in session alive) is handled one level up by Crawlee's `SessionPool`: once a `Session` is paired with a proxy URL, the crawler reuses that pairing for subsequent requests tied to the same session. See the +[session management guide](../guides/session-management) for more details. ```javascript const proxyConfiguration = await Actor.createProxyConfiguration({ @@ -125,8 +107,6 @@ const crawler = new PuppeteerCrawler({ }); ``` - - ## Apify Proxy vs. Your own proxies The `ProxyConfiguration` class covers both Apify Proxy and custom proxy URLs so that diff --git a/docs/upgrading/upgrading_v4.md b/docs/upgrading/upgrading_v4.md new file mode 100644 index 0000000000..ff4269d1c1 --- /dev/null +++ b/docs/upgrading/upgrading_v4.md @@ -0,0 +1,88 @@ +--- +id: upgrading-to-v4 +title: Upgrading to v4 +--- + +This page summarizes the breaking changes between Apify SDK v3 and v4. Apify SDK v4 adopts the redesigned Crawlee v4 interfaces (`Configuration`, `EventManager`, `StorageClient`, `ProxyConfiguration`), so most of the changes here track the corresponding Crawlee v4 changes. + +## Configuration + +The `Configuration` class no longer exposes `.get(key)` / `.set(key, value)`. Configuration values are resolved eagerly at construction time and exposed as plain typed properties. + +Before (v3): + +```ts +import { Configuration } from 'apify'; + +const config = Configuration.getGlobalConfig(); +const token = config.get('token'); +config.set('token', 'new-token'); +``` + +After (v4): + +```ts +import { Configuration } from 'apify'; + +// Construct with overrides — Configuration is immutable. +const config = new Configuration({ token: 'new-token' }); +const token = config.token; +``` + +Resolution order (highest to lowest priority): constructor options → environment variables → `crawlee.json` → schema defaults. + +Empty-string environment variables are treated as unset (they fall through to the schema default) rather than being coerced to `0` / `''` / `false`. For example, `ACTOR_MAX_TOTAL_CHARGE_USD=""` now resolves to `undefined` instead of `0`. + +## ProxyConfiguration: `newUrl()` / `newProxyInfo()` no longer take `sessionId` + +The `sessionId` parameter has been removed from both `ProxyConfiguration.newUrl()` and `ProxyConfiguration.newProxyInfo()`. Each call now returns an independent URL; for Apify Proxy the SDK mints a fresh random session id internally for every URL it hands out, so consecutive calls resolve to different IPs. + +Before (v3): + +```ts +const proxyConfiguration = await Actor.createProxyConfiguration({ + groups: ['RESIDENTIAL'], +}); + +// Sticky pairing: same sessionId → same proxy URL → same IP. +const url1 = await proxyConfiguration.newUrl('mySession'); +const url2 = await proxyConfiguration.newUrl('mySession'); // === url1 +``` + +After (v4): + +```ts +const proxyConfiguration = await Actor.createProxyConfiguration({ + groups: ['RESIDENTIAL'], +}); + +// Every call returns an independent URL with its own session id. +const url1 = await proxyConfiguration.newUrl(); +const url2 = await proxyConfiguration.newUrl(); // !== url1 +``` + +Session continuity (reusing the same IP across multiple requests) is now handled one level up by Crawlee's `SessionPool`: a `Session` stores the proxy URL it was paired with and the crawler reuses that URL for subsequent requests bound to the same session. When using `CheerioCrawler`, `PlaywrightCrawler`, etc. with `useSessionPool: true`, this is automatic — no code changes are required on the consumer side. + +`ProxyInfo` no longer carries a `sessionId` field. If you used it for logging or analytics, parse the `session-` segment out of `proxyInfo.username` instead (it is included for Apify Proxy URLs). + +The `tieredProxyUrls` and `tieredProxyConfig` options on `ProxyConfigurationOptions` were dropped in Crawlee v4 ([apify/crawlee#3599](https://github.com/apify/crawlee/pull/3599)) and the SDK no longer threads them through. Migrate to named sessions via `SessionPool` if you relied on tiered rotation. + +## EventManager + +`PlatformEventManager` now extends Crawlee v4's `EventManager` and integrates with the new service locator. Use `Configuration.getGlobalConfig()` (or pass a `Configuration` instance explicitly) when constructing it directly — the constructor no longer accepts a `config` override via the `override` keyword pattern because Crawlee's base class manages the configuration through `serviceLocator` instead of a `config` field. + +If you only interact with events through `Actor.on()` / `Actor.off()` / `Actor.events`, no code changes are needed. + +## StorageClient + +The SDK's storage layer was adapted to the new Crawlee v4 `StorageClient` interface. The Apify platform client is wrapped via an internal `ApifyStorageClient` adapter that implements `createDatasetClient`, `createKeyValueStoreClient`, and `createRequestQueueClient`. + +`KeyValueStore.getPublicUrl()` is now asynchronous (it signs URLs server-side when running on the Apify platform). Update call sites accordingly: + +```ts +// v3 +const url = store.getPublicUrl('myKey'); + +// v4 +const url = await store.getPublicUrl('myKey'); +``` diff --git a/packages/apify/src/proxy_configuration.ts b/packages/apify/src/proxy_configuration.ts index 5b4cdc1236..e3033d07f8 100644 --- a/packages/apify/src/proxy_configuration.ts +++ b/packages/apify/src/proxy_configuration.ts @@ -5,47 +5,21 @@ import { gotScraping } from 'got-scraping'; import ow from 'ow'; import { APIFY_ENV_VARS, APIFY_PROXY_VALUE_REGEX } from '@apify/consts'; +import { cryptoRandomObjectId } from '@apify/utilities'; import { Actor } from './actor.js'; import { Configuration } from './configuration.js'; -// https://docs.apify.com/proxy/datacenter-proxy#username-parameters -const MAX_SESSION_ID_LENGTH = 50; const CHECK_ACCESS_REQUEST_TIMEOUT_MILLIS = 4_000; const CHECK_ACCESS_MAX_ATTEMPTS = 2; const COUNTRY_CODE_REGEX = /^[A-Z]{2}$/; -type CoreProxyOptions = Parameters[0]; +// Apify Proxy session identifier embedded in the proxy username — opaque to +// users; a fresh one is minted for every URL the SDK hands out so that the +// returned proxy URLs are independent. +const SESSION_ID_LENGTH = 12; -/** - * Bridges the SDK's legacy `(sessionId, options?)` calling style with - * crawlee v4's `(options)` shape — pulls `sessionId` from a `Request` - * carried in `options` when no explicit `sessionId` is given. Rejects - * values that are neither a sessionId nor a plain options object - * (e.g. `Date`, arrays). - */ -function parseSessionIdOrOptions( - arg: string | number | CoreProxyOptions | undefined, - legacyOptions?: CoreProxyOptions, -): { sessionId: string | undefined; options: CoreProxyOptions } { - if (arg === undefined) { - return { sessionId: undefined, options: legacyOptions }; - } - if (typeof arg === 'string' || typeof arg === 'number') { - return { sessionId: String(arg), options: legacyOptions }; - } - if ( - typeof arg !== 'object' || - arg === null || - Array.isArray(arg) || - Object.getPrototypeOf(arg) !== Object.prototype - ) { - throw new TypeError( - 'Expected sessionId (string/number) or a TieredProxyOptions object', - ); - } - return { sessionId: arg.request?.sessionId, options: arg }; -} +type NewUrlOptions = Parameters[0]; export interface ProxyConfigurationOptions extends CoreProxyConfigurationOptions { @@ -111,27 +85,17 @@ export interface ProxyConfigurationOptions * requestHandler({ proxyInfo }) { * // Getting used proxy URL * const proxyUrl = proxyInfo.url; - * - * // Getting ID of used Session - * const sessionIdentifier = proxyInfo.sessionId; * } * }) * * ``` */ export interface ProxyInfo extends CoreProxyInfo { - /** - * The Apify Proxy session identifier the URL was minted for, if any. - * v3 carried this on the base `ProxyInfo`; v4 dropped it, so the SDK - * re-declares it here for users that read `proxyInfo.sessionId`. - */ - sessionId?: string; - /** * An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy). * If not provided, the proxy will select the groups automatically. */ - groups: string[]; + groups?: string[]; /** * If set and relevant proxies are available in your Apify account, all proxied requests will @@ -231,10 +195,6 @@ export class ProxyConfiguration extends CoreProxyConfiguration { password = config.proxyPassword, } = options; - // crawlee v4 (>=beta.51) removed `tieredProxyUrls` / - // `tieredProxyConfig` (see apify/crawlee#3599) — the SDK no - // longer threads tiered config through to the base class. - const groupsToUse = groups.length ? groups : apifyProxyGroups; const countryCodeToUse = countryCode || apifyProxyCountry; const hostname = config.proxyHostname; @@ -303,178 +263,65 @@ export class ProxyConfiguration extends CoreProxyConfiguration { } /** - * This function creates a new {@apilink ProxyInfo} info object. - * It is used by CheerioCrawler and PuppeteerCrawler to generate proxy URLs and also to allow the user to inspect - * the currently used proxy via the requestHandler parameter `proxyInfo`. - * Use it if you want to work with a rich representation of a proxy URL. - * If you need the URL string only, use {@apilink ProxyConfiguration.newUrl}. - * @param [sessionId] - * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or - * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier. - * When the provided sessionId is a number, it's converted to a string. Property sessionId of - * {@apilink ProxyInfo} is always returned as a type string. - * - * All the HTTP requests going through the proxy with the same session identifier - * will use the same target proxy server (i.e. the same IP address). - * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`. - * @return Represents information about used proxy and its configuration. + * Returns a new {@apilink ProxyInfo} object with a fresh proxy URL. Each call mints an + * independent URL; for Apify Proxy a random session id is embedded so consecutive + * calls resolve to different IPs. */ override async newProxyInfo( - sessionIdOrOptions?: - | string - | number - | Parameters[0], - legacyOptions?: Parameters[0], + options?: NewUrlOptions, ): Promise { - // crawlee v4 dropped the `(sessionId, options)` overload — `newProxyInfo` - // now takes a single `NewUrlOptions` argument and pulls `sessionId` - // from `options.request`. Keep the SDK's legacy "pass sessionId directly" - // shape working by discriminating at runtime. - const { sessionId } = parseSessionIdOrOptions( - sessionIdOrOptions, - legacyOptions, - ); - ow( - sessionId, - ow.optional.string - .maxLength(MAX_SESSION_ID_LENGTH) - .matches(APIFY_PROXY_VALUE_REGEX), - ); - - const url = await this.newUrl(sessionIdOrOptions, legacyOptions); + const url = await this.newUrl(options); if (!url) return undefined; - const { groups, countryCode, password, port, hostname } = ( - this.usesApifyProxy ? this : new URL(url) - ) as ProxyConfiguration; - - // Extract `username` from the resolved URL — crawlee v3 carried it - // on `ProxyInfo` and tests rely on it (e.g. for Apify Proxy session - // formatting). v4's `super.newProxyInfo` would surface this, but we - // bypass `super` here so the SDK can keep its legacy `sessionId` - // calling convention. Decode the URL-encoded username so callers - // see the human-readable form (matches v3 behaviour). - const rawUsername = new URL(url).username; - const username = rawUsername - ? decodeURIComponent(rawUsername) - : undefined; - - // Build the result lazily: omit Apify-only fields when the SDK is - // wrapping a custom `proxyUrls` rotation (matches v3 shape, which - // tests rely on with strict deep-equal). - const result: Partial = { + const parsed = new URL(url); + const result: ProxyInfo = { url, - sessionId, - // this.password is not encoded, but the password from the URL will be, we need to normalize - password: this.usesApifyProxy - ? (password ?? '') - : decodeURIComponent(password!), - hostname, - port: port!, - username, + username: decodeURIComponent(parsed.username), + password: decodeURIComponent(parsed.password), + hostname: parsed.hostname, + port: parsed.port, }; if (this.usesApifyProxy) { - result.groups = groups; - if (countryCode !== undefined) result.countryCode = countryCode; + result.groups = this.groups; + if (this.countryCode !== undefined) + result.countryCode = this.countryCode; } - return result as ProxyInfo; + return result; } /** - * Returns a new proxy URL based on provided configuration options and the `sessionId` parameter. - * @param [sessionId] - * Represents the identifier of user {@apilink Session} that can be managed by the {@apilink SessionPool} or - * you can use the Apify Proxy [Session](https://docs.apify.com/proxy#sessions) identifier. - * When the provided sessionId is a number, it's converted to a string. - * - * All the HTTP requests going through the proxy with the same session identifier - * will use the same target proxy server (i.e. the same IP address). - * The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`. - * @return A string with a proxy URL, including authentication credentials and port number. - * For example, `http://bob:password123@proxy.example.com:8000` + * Returns a new proxy URL. For Apify Proxy, each call generates a URL with a fresh + * random session id, so consecutive calls return independent URLs. For custom + * `proxyUrls`, the URLs are rotated round-robin. */ override async newUrl( - sessionIdOrOptions?: - | string - | number - | Parameters[0], - legacyOptions?: Parameters[0], + options?: NewUrlOptions, ): Promise { - const { sessionId, options } = parseSessionIdOrOptions( - sessionIdOrOptions, - legacyOptions, - ); - ow( - sessionId, - ow.optional.string - .maxLength(MAX_SESSION_ID_LENGTH) - .matches(APIFY_PROXY_VALUE_REGEX), - ); - if (this.newUrlFunction) { - return ( - (await this._callNewUrlFunction({ - request: options?.request, - })) ?? undefined - ); - } - if (this.proxyUrls) { - // `_handleCustomUrl` was removed from `CoreProxyConfiguration` in - // v4; inline the rotation logic to preserve session-stickiness. - // Round-robin index for sessionless calls (post-increment so the - // first call returns proxyUrls[0]); per-session sticky mapping - // when a sessionId is provided. - const index = - sessionId !== undefined - ? this.getSessionIndex(sessionId) - : this.nextCustomUrlIndex++ % this.proxyUrls.length; - return this.proxyUrls[index] ?? undefined; + if (this.newUrlFunction || this.proxyUrls) { + return super.newUrl(options); } - - return this.composeDefaultUrl(sessionId); - } - - /** - * Stable per-session index into `proxyUrls`, replacing the removed - * `_handleCustomUrl(sessionId)` from crawlee v3. - */ - private getSessionIndex(sessionId: string): number { - if (!this.usedProxyUrls.has(sessionId)) { - this.usedProxyUrls.set( - sessionId, - this.proxyUrls![ - this.usedProxyUrls.size % this.proxyUrls!.length - ], - ); - } - return this.proxyUrls!.indexOf(this.usedProxyUrls.get(sessionId)!); + return this.composeDefaultUrl(cryptoRandomObjectId(SESSION_ID_LENGTH)); } /** * Returns proxy username. */ - protected _getUsername(sessionId?: string): string { - let username; + protected _getUsername(sessionId: string): string { const { groups, countryCode } = this; const parts: string[] = []; if (groups && groups.length) { parts.push(`groups-${groups.join('+')}`); } - if (sessionId) { - parts.push(`session-${sessionId}`); - } + parts.push(`session-${sessionId}`); if (countryCode) { parts.push(`country-${countryCode}`); } - username = parts.join(','); - - if (parts.length === 0) username = 'auto'; - - return username; + return parts.join(','); } - protected composeDefaultUrl(sessionId?: string): string { + protected composeDefaultUrl(sessionId: string): string { const username = this._getUsername(sessionId); const url = new URL(`http://${this.hostname}:${this.port}`); url.username = `${username}`; diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index 358138cca8..3fa2a3fbe5 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -1,6 +1,6 @@ import { Actor, Configuration, ProxyConfiguration } from 'apify'; import { UserClient } from 'apify-client'; -import { type Dictionary, Request, serviceLocator, sleep } from 'crawlee'; +import { type Dictionary, serviceLocator } from 'crawlee'; import { gotScraping } from 'got-scraping'; import { APIFY_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts'; @@ -23,16 +23,15 @@ const hostname = LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]; const port = Number(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]); const password = 'test12345'; const countryCode = 'CZ'; -const sessionId = 538909250932; const basicOpts = { groups, countryCode, password, }; -const basicOptsProxyUrl = - 'http://groups-GROUP1+GROUP2,session-538909250932,country-CZ:test12345@proxy.apify.com:8000'; -const proxyUrlNoSession = - 'http://groups-GROUP1+GROUP2,country-CZ:test12345@proxy.apify.com:8000'; +// Apify Proxy URLs always carry a fresh random `session-XXXX` segment; tests +// match against this pattern rather than a hard-coded session id. +const apifyProxyUrlPattern = + /^http:\/\/groups-GROUP1\+GROUP2,session-[A-Za-z0-9]+,country-CZ:test12345@proxy\.apify\.com:8000$/; vitest.mock('got-scraping', async () => { return { @@ -67,48 +66,45 @@ describe('ProxyConfiguration', () => { expect(proxyConfiguration.port).toBe(port); }); - test('newUrl() should return proxy URL', async () => { + test('newUrl() returns an Apify Proxy URL with a random session id', async () => { const proxyConfiguration = new ProxyConfiguration(basicOpts); - expect(await proxyConfiguration.newUrl(sessionId)).toBe( - basicOptsProxyUrl, - ); + const url1 = await proxyConfiguration.newUrl(); + const url2 = await proxyConfiguration.newUrl(); + + expect(url1).toMatch(apifyProxyUrlPattern); + expect(url2).toMatch(apifyProxyUrlPattern); + // Consecutive calls must produce independent URLs. + expect(url1).not.toBe(url2); }); - test('newProxyInfo() should return ProxyInfo object', async () => { + test('newProxyInfo() returns a ProxyInfo object with a fresh URL', async () => { const proxyConfiguration = new ProxyConfiguration(basicOpts); - const url = basicOptsProxyUrl; - const proxyInfo = { - sessionId: `${sessionId}`, - url, - groups, - countryCode, - password, - hostname, - port, - username: 'groups-GROUP1+GROUP2,session-538909250932,country-CZ', - }; - expect(await proxyConfiguration.newProxyInfo(sessionId)).toEqual( - proxyInfo, + const info = await proxyConfiguration.newProxyInfo(); + expect(info).toBeDefined(); + expect(info!.url).toMatch(apifyProxyUrlPattern); + expect(info!.groups).toEqual(groups); + expect(info!.countryCode).toBe(countryCode); + expect(info!.password).toBe(password); + expect(info!.hostname).toBe(hostname); + expect(info!.port).toBe(String(port)); + expect(info!.username).toMatch( + /^groups-GROUP1\+GROUP2,session-[A-Za-z0-9]+,country-CZ$/, ); }); - test('newProxyInfo() works with special characters', async () => { + test('newProxyInfo() works with custom proxyUrls and special characters', async () => { const url = 'http://user%40name:pass%40word@proxy.com:1111'; const proxyConfiguration = new ProxyConfiguration({ proxyUrls: [url] }); - const proxyInfo = { - sessionId: `${sessionId}`, + expect(await proxyConfiguration.newProxyInfo()).toEqual({ url, username: 'user@name', password: 'pass@word', hostname: 'proxy.com', port: '1111', - }; - expect(await proxyConfiguration.newProxyInfo(sessionId)).toEqual( - proxyInfo, - ); + }); }); test('actor UI input schema should work', () => { @@ -181,39 +177,6 @@ describe('ProxyConfiguration', () => { expect(() => new ProxyConfiguration({ countryCode: 1111 })).toThrow(); }); - test('newUrl() should throw on invalid session argument', async () => { - const proxyConfiguration = new ProxyConfiguration(); - await Promise.all([ - expect(async () => - proxyConfiguration.newUrl('a-b'), - ).rejects.toThrow(), - expect(proxyConfiguration.newUrl('a$b')).rejects.toThrow(), - // crawlee v4 made `newUrl` accept `TieredProxyOptions`, so - // an empty object is a valid (sessionless) call now. We only - // reject inputs that are neither a sessionId nor a plain - // options object. - // @ts-expect-error invalid input - expect(proxyConfiguration.newUrl(new Date())).rejects.toThrow(), - expect( - proxyConfiguration.newUrl(Array(51).fill('x').join('')), - ).rejects.toThrow(), - - expect(proxyConfiguration.newUrl('a_b')).resolves.not.toThrow(), - expect( - proxyConfiguration.newUrl('0.34252352'), - ).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl('aaa~BBB')).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl('a_1_b')).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl('a_2')).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl('a')).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl('1')).resolves.not.toThrow(), - expect(proxyConfiguration.newUrl(123456)).resolves.not.toThrow(), - expect( - proxyConfiguration.newUrl(Array(50).fill('x').join('')), - ).resolves.not.toThrow(), - ]); - }); - test('should throw on invalid newUrlFunction', async () => { const newUrlFunction = () => { return 'http://proxy.com:1111*invalid_url'; @@ -258,7 +221,6 @@ describe('ProxyConfiguration', () => { 'http://proxy.com:4444', ); - // TODO enable strictNullChecks in tests // through newProxyInfo() expect((await proxyConfiguration.newProxyInfo())?.url).toEqual( 'http://proxy.com:3333', @@ -271,46 +233,6 @@ describe('ProxyConfiguration', () => { ); }); - test('async newUrlFunction should work correctly', async () => { - const customUrls = [ - 'http://proxy.com:1111', - 'http://proxy.com:2222', - 'http://proxy.com:3333', - 'http://proxy.com:4444', - 'http://proxy.com:5555', - 'http://proxy.com:6666', - ]; - const newUrlFunction = async () => { - await sleep(5); - return customUrls.pop() ?? null; - }; - const proxyConfiguration = new ProxyConfiguration({ - newUrlFunction, - }); - - // through newUrl() - expect(await proxyConfiguration.newUrl()).toEqual( - 'http://proxy.com:6666', - ); - expect(await proxyConfiguration.newUrl()).toEqual( - 'http://proxy.com:5555', - ); - expect(await proxyConfiguration.newUrl()).toEqual( - 'http://proxy.com:4444', - ); - - // through newProxyInfo() - expect((await proxyConfiguration.newProxyInfo())!.url).toEqual( - 'http://proxy.com:3333', - ); - expect((await proxyConfiguration.newProxyInfo())!.url).toEqual( - 'http://proxy.com:2222', - ); - expect((await proxyConfiguration.newProxyInfo())!.url).toEqual( - 'http://proxy.com:1111', - ); - }); - describe('With proxyUrls options', () => { test('should rotate custom URLs correctly', async () => { const proxyConfiguration = new ProxyConfiguration({ @@ -362,62 +284,6 @@ describe('ProxyConfiguration', () => { ); }); - test('should rotate custom URLs with sessions correctly', async () => { - const sessions = [ - 'sesssion_01', - 'sesssion_02', - 'sesssion_03', - 'sesssion_04', - 'sesssion_05', - 'sesssion_06', - ]; - const proxyConfiguration = new ProxyConfiguration({ - proxyUrls: [ - 'http://proxy.com:1111', - 'http://proxy.com:2222', - 'http://proxy.com:3333', - ], - }); - - // @ts-expect-error TODO private property? - const { proxyUrls } = proxyConfiguration; - // should use same proxy URL - expect(await proxyConfiguration.newUrl(sessions[0])).toEqual( - proxyUrls![0], - ); - expect(await proxyConfiguration.newUrl(sessions[0])).toEqual( - proxyUrls![0], - ); - expect(await proxyConfiguration.newUrl(sessions[0])).toEqual( - proxyUrls![0], - ); - - // should rotate different proxies - expect(await proxyConfiguration.newUrl(sessions[1])).toEqual( - proxyUrls![1], - ); - expect(await proxyConfiguration.newUrl(sessions[2])).toEqual( - proxyUrls![2], - ); - expect(await proxyConfiguration.newUrl(sessions[3])).toEqual( - proxyUrls![0], - ); - expect(await proxyConfiguration.newUrl(sessions[4])).toEqual( - proxyUrls![1], - ); - expect(await proxyConfiguration.newUrl(sessions[5])).toEqual( - proxyUrls![2], - ); - - // should remember already used session - expect(await proxyConfiguration.newUrl(sessions[1])).toEqual( - proxyUrls![1], - ); - expect(await proxyConfiguration.newUrl(sessions[3])).toEqual( - proxyUrls![0], - ); - }); - test('should throw cannot combine custom proxies with Apify Proxy', async () => { const proxyUrls = [ 'http://proxy.com:1111', @@ -511,7 +377,6 @@ describe('Actor.createProxyConfiguration()', () => { test('should work with all options', async () => { const status = { connected: true }; - const proxyUrl = proxyUrlNoSession; const url = 'http://proxy.apify.com/?format=json'; gotScrapingSpy.mockResolvedValueOnce({ body: status } as any); @@ -532,7 +397,7 @@ describe('Actor.createProxyConfiguration()', () => { expect(gotScrapingSpy).toBeCalledWith({ url, - proxyUrl, + proxyUrl: expect.stringMatching(apifyProxyUrlPattern), timeout: { request: 4000 }, responseType: 'json', }); @@ -656,7 +521,11 @@ describe('Actor.createProxyConfiguration()', () => { await Actor.createProxyConfiguration(); expect(gotScrapingSpy).toBeCalledWith({ url: `${process.env.APIFY_PROXY_STATUS_URL}/?format=json`, - proxyUrl: `http://auto:${password}@${process.env.APIFY_PROXY_HOSTNAME}:8000`, + proxyUrl: expect.stringMatching( + new RegExp( + `^http://session-[A-Za-z0-9]+:${password}@${process.env.APIFY_PROXY_HOSTNAME}:8000$`, + ), + ), responseType: 'json', timeout: { request: 4000, From 68a20bab88270544189693b5cefc40a4b58f327a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Tue, 12 May 2026 16:30:30 +0200 Subject: [PATCH 11/12] test(proxy): use Actor.resetGlobalState() in proxy_configuration.test.ts --- test/apify/proxy_configuration.test.ts | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index 3fa2a3fbe5..cd6e41c99e 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -1,23 +1,10 @@ -import { Actor, Configuration, ProxyConfiguration } from 'apify'; +import { Actor, ProxyConfiguration } from 'apify'; import { UserClient } from 'apify-client'; -import { type Dictionary, serviceLocator } from 'crawlee'; +import { type Dictionary } from 'crawlee'; import { gotScraping } from 'got-scraping'; import { APIFY_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts'; -// crawlee v4's Configuration resolves env vars eagerly at construction, -// and the SDK keeps `Configuration.globalConfig` plus `Actor._instance` as -// cached singletons. Tests in this file mutate proxy-related env vars at -// runtime, so we have to clear those caches before each test. -function resetGlobalState() { - serviceLocator.reset(); - ( - Configuration as unknown as { globalConfig?: Configuration } - ).globalConfig = undefined; - // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field - (Actor as unknown as { _instance?: Actor })._instance = undefined; -} - const groups = ['GROUP1', 'GROUP2']; const hostname = LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]; const port = Number(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]); @@ -372,7 +359,7 @@ describe('Actor.createProxyConfiguration()', () => { const userData = { proxy: { password } }; beforeEach(() => { - resetGlobalState(); + Actor.resetGlobalState(); }); test('should work with all options', async () => { From 8dbc775da3e295bc72468089d545a5dca58e5243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Wed, 13 May 2026 16:47:16 +0200 Subject: [PATCH 12/12] test(proxy): use the resetGlobalState() helper --- test/apify/proxy_configuration.test.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/apify/proxy_configuration.test.ts b/test/apify/proxy_configuration.test.ts index cd6e41c99e..718d9db1bb 100644 --- a/test/apify/proxy_configuration.test.ts +++ b/test/apify/proxy_configuration.test.ts @@ -5,6 +5,8 @@ import { gotScraping } from 'got-scraping'; import { APIFY_ENV_VARS, LOCAL_APIFY_ENV_VARS } from '@apify/consts'; +import { resetGlobalState } from '../resetGlobalState.js'; + const groups = ['GROUP1', 'GROUP2']; const hostname = LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_HOSTNAME]; const port = Number(LOCAL_APIFY_ENV_VARS[APIFY_ENV_VARS.PROXY_PORT]); @@ -359,7 +361,7 @@ describe('Actor.createProxyConfiguration()', () => { const userData = { proxy: { password } }; beforeEach(() => { - Actor.resetGlobalState(); + resetGlobalState(); }); test('should work with all options', async () => {