From 9ff3e794ba6ff83938be5407dd17f4692b965a8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 17:56:43 +0200 Subject: [PATCH 01/12] fix: adapt SDK storage layer to crawlee v4 StorageClient interface MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Crawlee v4 reshaped its `StorageClient` interface (async factory methods that accept `id` *or* `name`), removed the cached `storageObject` from `KeyValueStore`, and made `getPublicUrl` async. The existing SDK code targeted the v3 shape and no longer compiles. Changes: - New `ApifyStorageClient` adapter wraps `apify-client`'s legacy `dataset()/keyValueStore()/requestQueue()` accessors and exposes the `createDatasetClient/createKeyValueStoreClient/createRequestQueueClient` factories crawlee now expects. Names are resolved to IDs via the collection `getOrCreate(name)` calls. apify-client's resource clients don't yet implement v4-only members like `getMetadata` / `getRecordPublicUrl`; the adapter casts through with a TODO comment so the structural alignment can land separately upstream. - `Actor.init` and `_openStorage` now wrap `this.apifyClient` in `ApifyStorageClient` before handing it to crawlee. - `KeyValueStore.getPublicUrl` is now async; the per-store `urlSigningSecretKey` is fetched on demand via the (private) `client.getMetadata()` instead of the removed `storageObject` cache. URL-signing behaviour for platform-mode reads is preserved. - `Actor.openRequestQueue` reads `totalRequestCount` via the new `client.getMetadata()` (the old `client.get()` was dropped). - `StorageManager.openStorage` is now `(class, id?, client?)` — removed the trailing `this.config` argument. Stacked on #583 (config redesign); rebases onto v4 once that lands. --- packages/apify/src/actor.ts | 18 +++--- packages/apify/src/apify_storage_client.ts | 72 ++++++++++++++++++++++ packages/apify/src/key_value_store.ts | 38 ++++++++---- 3 files changed, 106 insertions(+), 22 deletions(-) create mode 100644 packages/apify/src/apify_storage_client.ts diff --git a/packages/apify/src/actor.ts b/packages/apify/src/actor.ts index 56a3d3c9cd..05bde349de 100644 --- a/packages/apify/src/actor.ts +++ b/packages/apify/src/actor.ts @@ -47,6 +47,7 @@ import type { ChargeOptions, ChargeResult } from './charging.js'; import { ChargingManager } from './charging.js'; import type { ConfigurationOptions } from './configuration.js'; import { Configuration } from './configuration.js'; +import { ApifyStorageClient } from './apify_storage_client.js'; import { KeyValueStore } from './key_value_store.js'; import { PlatformEventManager } from './platform_event_manager.js'; import type { ProxyConfigurationOptions } from './proxy_configuration.js'; @@ -515,7 +516,9 @@ export class Actor { if (this.isAtHome()) { // availableMemoryRatio and disableBrowserSandbox are now set via // conditional defaults in the Configuration constructor (isAtHome check) - serviceLocator.setStorageClient(this.apifyClient); + serviceLocator.setStorageClient( + new ApifyStorageClient(this.apifyClient), + ); serviceLocator.setEventManager(this.eventManager); } else if (options.storage) { serviceLocator.setStorageClient(options.storage); @@ -1330,7 +1333,7 @@ export class Actor { // eslint-disable-next-line dot-notation queue['initialCount'] = - (await queue.client.get())?.totalRequestCount ?? 0; + (await queue.client.getMetadata())?.totalRequestCount ?? 0; return queue; } @@ -2255,13 +2258,10 @@ export class Actor { id?: string, options: OpenStorageOptions = {}, ) { - const client = options.forceCloud ? this.apifyClient : undefined; - return StorageManager.openStorage( - storageClass, - id, - client, - this.config, - ); + const client = options.forceCloud + ? new ApifyStorageClient(this.apifyClient) + : undefined; + return StorageManager.openStorage(storageClass, id, client); } private _ensureActorInit(methodCalled: string) { diff --git a/packages/apify/src/apify_storage_client.ts b/packages/apify/src/apify_storage_client.ts new file mode 100644 index 0000000000..4926452fc4 --- /dev/null +++ b/packages/apify/src/apify_storage_client.ts @@ -0,0 +1,72 @@ +import type { + CreateDatasetClientOptions, + CreateKeyValueStoreClientOptions, + CreateRequestQueueClientOptions, + DatasetClient, + KeyValueStoreClient, + RequestQueueClient, + StorageClient, +} from '@crawlee/types'; +import type { ApifyClient } from 'apify-client'; + +/** + * Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`, + * `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's + * `StorageClient` interface (async factory methods accepting either an `id` + * or a `name`). + * + * When only a `name` is provided, we resolve it to a concrete ID via the + * collection client's `getOrCreate(name)` — matching the behaviour the SDK + * relied on in v3 when storages were opened by name. + */ +export class ApifyStorageClient implements StorageClient { + constructor(private readonly client: ApifyClient) {} + + async createDatasetClient( + options?: CreateDatasetClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? (await this.client.datasets().getOrCreate(options.name)).id + : undefined); + // apify-client's resource clients overlap with `@crawlee/types`' shapes + // but don't yet implement the v4-added members (`getMetadata`, + // `getRecordPublicUrl`). Cast through for now; a follow-up should + // bring apify-client into structural alignment. + return this.client.dataset(id ?? '') as unknown as DatasetClient; + } + + async createKeyValueStoreClient( + options?: CreateKeyValueStoreClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? ( + await this.client + .keyValueStores() + .getOrCreate(options.name) + ).id + : undefined); + return this.client.keyValueStore(id ?? '') as unknown as KeyValueStoreClient; + } + + async createRequestQueueClient( + options?: CreateRequestQueueClientOptions, + ): Promise { + const id = + options?.id ?? + (options?.name + ? ( + await this.client + .requestQueues() + .getOrCreate(options.name) + ).id + : undefined); + return this.client.requestQueue( + id ?? '', + options?.clientKey ? { clientKey: options.clientKey } : undefined, + ) as unknown as RequestQueueClient; + } +} diff --git a/packages/apify/src/key_value_store.ts b/packages/apify/src/key_value_store.ts index a26a12e8f1..13ea1fd0a7 100644 --- a/packages/apify/src/key_value_store.ts +++ b/packages/apify/src/key_value_store.ts @@ -1,12 +1,18 @@ import type { StorageManagerOptions } from '@crawlee/core'; import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core'; +import type { KeyValueStoreInfo } from '@crawlee/types'; import { createHmacSignature } from '@apify/utilities'; import type { Configuration } from './configuration.js'; -// @ts-ignore newer crawlee versions already declare this method in core -const { getPublicUrl } = CoreKeyValueStore.prototype; +// crawlee v4 dropped the `storageObject` cache from `KeyValueStore`, so the +// per-store `urlSigningSecretKey` (which is part of the platform's metadata +// response but not declared on `@crawlee/types`' `KeyValueStoreInfo`) has to +// be fetched on demand and accessed through a structural-typed augmentation. +type ApifyKeyValueStoreInfo = KeyValueStoreInfo & { + urlSigningSecretKey?: string; +}; /** * @inheritDoc @@ -15,24 +21,33 @@ export class KeyValueStore extends CoreKeyValueStore { /** * Returns a URL for the given key that may be used to publicly * access the value in the remote key-value store. + * + * On the Apify platform the URL is signed with the store's + * `urlSigningSecretKey` so that anyone with the URL can read the record + * without authentication. Locally we delegate to crawlee's default + * implementation (which produces a `file://` URL or returns `undefined`). */ - override getPublicUrl(key: string): string { + override async getPublicUrl(key: string): Promise { const config = this.config as Configuration; - if (!config.isAtHome && getPublicUrl) { - return getPublicUrl.call(this, key); + if (!config.isAtHome) { + return super.getPublicUrl(key); } const publicUrl = new URL( `${config.apiPublicBaseUrl}/v2/key-value-stores/${this.id}/records/${key}`, ); - if (this.storageObject?.urlSigningSecretKey) { + // `client` is `private` on `CoreKeyValueStore`; bypass the visibility + // check to fetch the per-store secret. There is no public crawlee API + // surface for this yet — track upstream exposure as a follow-up. + const metadata = (await ( + this as unknown as { client: { getMetadata(): Promise } } + ).client.getMetadata()) as ApifyKeyValueStoreInfo; + + if (metadata?.urlSigningSecretKey) { publicUrl.searchParams.append( 'signature', - createHmacSignature( - this.storageObject.urlSigningSecretKey as string, - key, - ), + createHmacSignature(metadata.urlSigningSecretKey, key), ); } @@ -49,6 +64,3 @@ export class KeyValueStore extends CoreKeyValueStore { return super.open(storeIdOrName, options) as unknown as KeyValueStore; } } - -// @ts-ignore newer crawlee versions already declare this method in core -CoreKeyValueStore.prototype.getPublicUrl = KeyValueStore.prototype.getPublicUrl; From 087e1173a77e9e2ba4505b8fd6247e677a7e6025 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 18:18:49 +0200 Subject: [PATCH 02/12] test: migrate MemoryStorageEmulator to crawlee v4 service locator Replace the removed `StorageManager.clearCache()` and `Configuration.useStorageClient()` with `serviceLocator.reset()` plus `serviceLocator.setStorageClient()`. --- test/MemoryStorageEmulator.ts | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index c5d4511236..c0bc20bafc 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -1,9 +1,8 @@ import { rm } from 'node:fs/promises'; import { resolve } from 'node:path'; -import { StorageManager } from '@crawlee/core'; +import { serviceLocator } from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; -import { Configuration } from 'apify'; import { ensureDir } from 'fs-extra'; import log from '@apify/log'; @@ -20,7 +19,10 @@ export class MemoryStorageEmulator { protected localStorageDirectories: string[] = []; async init(dirName = cryptoRandomObjectId(10)) { - StorageManager.clearCache(); + // crawlee v4 dropped `StorageManager.clearCache()` and + // `Configuration.useStorageClient()`; reset the service locator + // and re-register the in-memory client instead. + serviceLocator.reset(); const localStorageDir = resolve(LOCAL_EMULATION_DIR, dirName); this.localStorageDirectories.push(localStorageDir); await ensureDir(localStorageDir); @@ -28,7 +30,7 @@ export class MemoryStorageEmulator { const storage = new MemoryStorage({ localDataDirectory: localStorageDir, }); - Configuration.getGlobalConfig().useStorageClient(storage); + serviceLocator.setStorageClient(storage); log.debug( `Initialized emulated memory storage in folder ${localStorageDir}`, ); @@ -40,7 +42,7 @@ export class MemoryStorageEmulator { }); await Promise.all(promises); - StorageManager.clearCache(); + serviceLocator.reset(); } static toString() { From bbce3fccce2be83df88fb8725e22d1989213853e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 18:34:49 +0200 Subject: [PATCH 03/12] chore: fix import sort in actor.ts after ApifyStorageClient addition --- packages/apify/src/actor.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/apify/src/actor.ts b/packages/apify/src/actor.ts index 05bde349de..9a35c4f55a 100644 --- a/packages/apify/src/actor.ts +++ b/packages/apify/src/actor.ts @@ -43,11 +43,11 @@ import { decryptInputSecrets } from '@apify/input_secrets'; import log from '@apify/log'; import { addTimeoutToPromise } from '@apify/timeout'; +import { ApifyStorageClient } from './apify_storage_client.js'; import type { ChargeOptions, ChargeResult } from './charging.js'; import { ChargingManager } from './charging.js'; import type { ConfigurationOptions } from './configuration.js'; import { Configuration } from './configuration.js'; -import { ApifyStorageClient } from './apify_storage_client.js'; import { KeyValueStore } from './key_value_store.js'; import { PlatformEventManager } from './platform_event_manager.js'; import type { ProxyConfigurationOptions } from './proxy_configuration.js'; From 412f46b97ca52a03937de58972442fe1a2ebb5ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:12:01 +0200 Subject: [PATCH 04/12] chore: prettier --- packages/apify/src/apify_storage_client.ts | 18 +++++++----------- packages/apify/src/key_value_store.ts | 4 +++- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/packages/apify/src/apify_storage_client.ts b/packages/apify/src/apify_storage_client.ts index 4926452fc4..6301411010 100644 --- a/packages/apify/src/apify_storage_client.ts +++ b/packages/apify/src/apify_storage_client.ts @@ -43,13 +43,12 @@ export class ApifyStorageClient implements StorageClient { const id = options?.id ?? (options?.name - ? ( - await this.client - .keyValueStores() - .getOrCreate(options.name) - ).id + ? (await this.client.keyValueStores().getOrCreate(options.name)) + .id : undefined); - return this.client.keyValueStore(id ?? '') as unknown as KeyValueStoreClient; + return this.client.keyValueStore( + id ?? '', + ) as unknown as KeyValueStoreClient; } async createRequestQueueClient( @@ -58,11 +57,8 @@ export class ApifyStorageClient implements StorageClient { const id = options?.id ?? (options?.name - ? ( - await this.client - .requestQueues() - .getOrCreate(options.name) - ).id + ? (await this.client.requestQueues().getOrCreate(options.name)) + .id : undefined); return this.client.requestQueue( id ?? '', diff --git a/packages/apify/src/key_value_store.ts b/packages/apify/src/key_value_store.ts index 13ea1fd0a7..356180bdcd 100644 --- a/packages/apify/src/key_value_store.ts +++ b/packages/apify/src/key_value_store.ts @@ -41,7 +41,9 @@ export class KeyValueStore extends CoreKeyValueStore { // check to fetch the per-store secret. There is no public crawlee API // surface for this yet — track upstream exposure as a follow-up. const metadata = (await ( - this as unknown as { client: { getMetadata(): Promise } } + this as unknown as { + client: { getMetadata(): Promise }; + } ).client.getMetadata()) as ApifyKeyValueStoreInfo; if (metadata?.urlSigningSecretKey) { From b6689cc6d932a322a06313e90478cf7b436308ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:14:00 +0200 Subject: [PATCH 05/12] test: also reset SDK Configuration.globalConfig and Actor singleton on emulator init/destroy --- test/MemoryStorageEmulator.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index c0bc20bafc..cab9eab144 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -3,11 +3,25 @@ import { resolve } from 'node:path'; import { serviceLocator } from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; +import { Actor, Configuration } from 'apify'; import { ensureDir } from 'fs-extra'; import log from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; +function resetGlobalState() { + serviceLocator.reset(); + // The SDK's `Configuration` keeps its own static singleton (separate + // from crawlee's serviceLocator), and `Actor` caches a default + // instance with the resolved config. Both must be cleared so each + // test starts with a fresh config that reads the env vars it just set. + ( + Configuration as unknown as { globalConfig?: Configuration } + ).globalConfig = undefined; + // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field + (Actor as unknown as { _instance?: Actor })._instance = undefined; +} + const LOCAL_EMULATION_DIR = resolve( __dirname, '..', @@ -22,7 +36,7 @@ export class MemoryStorageEmulator { // crawlee v4 dropped `StorageManager.clearCache()` and // `Configuration.useStorageClient()`; reset the service locator // and re-register the in-memory client instead. - serviceLocator.reset(); + resetGlobalState(); const localStorageDir = resolve(LOCAL_EMULATION_DIR, dirName); this.localStorageDirectories.push(localStorageDir); await ensureDir(localStorageDir); @@ -42,7 +56,7 @@ export class MemoryStorageEmulator { }); await Promise.all(promises); - serviceLocator.reset(); + resetGlobalState(); } static toString() { From 037eb7d64177c4d7468e974817b150a5c77739da Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:17:21 +0200 Subject: [PATCH 06/12] test: align actor.test.ts mocks/expectations with v4 StorageClient adapter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - `openRequestQueue should open storage`: mock client uses `getMetadata()` (the v3 `get()` was dropped on RequestQueueClient). - Both Storage API tests assert that StorageManager.openStorage is called with an ApifyStorageClient (matched structurally) instead of the raw ApifyClient — the SDK now wraps it for crawlee v4. --- test/apify/actor.test.ts | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/test/apify/actor.test.ts b/test/apify/actor.test.ts index 3de4ce9ba8..9d6086b478 100644 --- a/test/apify/actor.test.ts +++ b/test/apify/actor.test.ts @@ -764,13 +764,26 @@ describe('Actor', () => { 'openStorage', ); + // crawlee v4's `RequestQueueClient` exposes metadata via + // `getMetadata()` (the v3 `get()` was dropped). const mockRQ = { - client: { get: () => ({ totalRequestCount: 10 }) }, + client: { + getMetadata: async () => ({ totalRequestCount: 10 }), + }, }; openStorageSpy.mockImplementationOnce(async () => mockRQ); const queue = await sdk.openRequestQueue(queueId, options); - expect(openStorageSpy).toBeCalledWith(queueId, sdk.apifyClient); + // The SDK now wraps `apifyClient` in an `ApifyStorageClient` + // adapter to satisfy crawlee v4's `StorageClient` interface. + expect(openStorageSpy).toBeCalledWith( + queueId, + expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), + ); expect(openStorageSpy).toBeCalledTimes(1); // @ts-expect-error private prop @@ -789,7 +802,11 @@ describe('Actor', () => { expect(mockOpenStorage).toBeCalledTimes(1); expect(mockOpenStorage).toBeCalledWith( datasetName, - sdk.apifyClient, + expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), ); }); }); From b8d009847c77c49a7199344596bdcd08c264481a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 19:50:44 +0200 Subject: [PATCH 07/12] test: clear Configuration AsyncLocalStorage between tests (Node 22 fix) Actor.init() calls Configuration.storage.enterWith(this.config), which sticks the resolved config onto the current async context and persists across tests on Node 22 (but not Node 24+). The cached value short- circuits Configuration.getGlobalConfig() so subsequent tests never see the env vars they just set. Reset the AsyncLocalStorage value alongside the other singletons in the test emulator so addWebhook (and friends) see ACTOR_RUN_ID etc. --- test/MemoryStorageEmulator.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index cab9eab144..7851667ea7 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -20,6 +20,16 @@ function resetGlobalState() { ).globalConfig = undefined; // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field (Actor as unknown as { _instance?: Actor })._instance = undefined; + // `Actor.init()` calls `Configuration.storage.enterWith(this.config)`, + // which sticks the resolved config onto the current async context and + // persists across tests on some Node versions (observed on Node 22 + // but not Node 24+). Clear the AsyncLocalStorage value so the next + // `Configuration.getGlobalConfig()` falls through to a fresh build. + if (Configuration.storage?.getStore()) { + ( + Configuration.storage as unknown as { enterWith(v: unknown): void } + ).enterWith(undefined); + } } const LOCAL_EMULATION_DIR = resolve( From 7e4ad7d07f57b49df6e9d2a93af0f50df19acb3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 30 Apr 2026 20:55:22 +0200 Subject: [PATCH 08/12] test: replace Configuration.storage with fresh AsyncLocalStorage on reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `Actor.init()` calls `Configuration.storage.enterWith(this.config)`, which sets the AsyncLocalStorage value on whichever async context the test runner happened to be on. `enterWith(undefined)` from a child async branch (vitest's beforeEach) doesn't unwind that — on Node 22 the test body re-enters a sibling context where the original `enterWith` is still in effect, so `getStore()` still returns the stale Configuration even after our reset. Swapping the entire `AsyncLocalStorage` instance for a fresh one guarantees `getStore()` returns `undefined` for every async branch that follows, fixing the addWebhook test failures on Node 22. --- test/MemoryStorageEmulator.ts | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index 7851667ea7..b061ea2473 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -1,3 +1,4 @@ +import { AsyncLocalStorage } from 'node:async_hooks'; import { rm } from 'node:fs/promises'; import { resolve } from 'node:path'; @@ -21,15 +22,16 @@ function resetGlobalState() { // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field (Actor as unknown as { _instance?: Actor })._instance = undefined; // `Actor.init()` calls `Configuration.storage.enterWith(this.config)`, - // which sticks the resolved config onto the current async context and - // persists across tests on some Node versions (observed on Node 22 - // but not Node 24+). Clear the AsyncLocalStorage value so the next - // `Configuration.getGlobalConfig()` falls through to a fresh build. - if (Configuration.storage?.getStore()) { - ( - Configuration.storage as unknown as { enterWith(v: unknown): void } - ).enterWith(undefined); - } + // which sticks the resolved config onto the *outer* async context + // (vitest's test runner). `enterWith(undefined)` from a child context + // (this beforeEach) doesn't propagate back up, so on Node 22 the next + // test still sees the stale store. Replace the entire AsyncLocalStorage + // instance to guarantee `getStore()` returns `undefined` everywhere. + ( + Configuration as unknown as { + storage: AsyncLocalStorage; + } + ).storage = new AsyncLocalStorage(); } const LOCAL_EMULATION_DIR = resolve( From 7d50a7423eb59dfc04855b374f29234d29a875c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Tue, 12 May 2026 16:27:35 +0200 Subject: [PATCH 09/12] test: use Actor.resetGlobalState() in MemoryStorageEmulator instead of inline boilerplate --- test/MemoryStorageEmulator.ts | 31 +++---------------------------- 1 file changed, 3 insertions(+), 28 deletions(-) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index b061ea2473..dae4bedf00 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -1,39 +1,14 @@ -import { AsyncLocalStorage } from 'node:async_hooks'; import { rm } from 'node:fs/promises'; import { resolve } from 'node:path'; import { serviceLocator } from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; -import { Actor, Configuration } from 'apify'; +import { Actor } from 'apify'; import { ensureDir } from 'fs-extra'; import log from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; -function resetGlobalState() { - serviceLocator.reset(); - // The SDK's `Configuration` keeps its own static singleton (separate - // from crawlee's serviceLocator), and `Actor` caches a default - // instance with the resolved config. Both must be cleared so each - // test starts with a fresh config that reads the env vars it just set. - ( - Configuration as unknown as { globalConfig?: Configuration } - ).globalConfig = undefined; - // eslint-disable-next-line no-underscore-dangle -- `_instance` is the upstream Actor singleton field - (Actor as unknown as { _instance?: Actor })._instance = undefined; - // `Actor.init()` calls `Configuration.storage.enterWith(this.config)`, - // which sticks the resolved config onto the *outer* async context - // (vitest's test runner). `enterWith(undefined)` from a child context - // (this beforeEach) doesn't propagate back up, so on Node 22 the next - // test still sees the stale store. Replace the entire AsyncLocalStorage - // instance to guarantee `getStore()` returns `undefined` everywhere. - ( - Configuration as unknown as { - storage: AsyncLocalStorage; - } - ).storage = new AsyncLocalStorage(); -} - const LOCAL_EMULATION_DIR = resolve( __dirname, '..', @@ -48,7 +23,7 @@ export class MemoryStorageEmulator { // crawlee v4 dropped `StorageManager.clearCache()` and // `Configuration.useStorageClient()`; reset the service locator // and re-register the in-memory client instead. - resetGlobalState(); + Actor.resetGlobalState(); const localStorageDir = resolve(LOCAL_EMULATION_DIR, dirName); this.localStorageDirectories.push(localStorageDir); await ensureDir(localStorageDir); @@ -68,7 +43,7 @@ export class MemoryStorageEmulator { }); await Promise.all(promises); - resetGlobalState(); + Actor.resetGlobalState(); } static toString() { From ae0cf99e07b136a7c3650408370248cc7b32153b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Tue, 12 May 2026 17:04:30 +0200 Subject: [PATCH 10/12] fix(storage): adapt to crawlee v4 beta.56 storage rename MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit beta.56 (apify/crawlee#3584) renamed `StorageManager` → `StorageInstanceManager` and reshaped the public storage open path. The static `StorageManager.openStorage(cls, id, client)` helper is gone; each storage class now exposes `static open(id, options?)` with a `storageClient` option for routing through a custom backend. - `actor.ts`: `_openStorage` now calls `storageClass.open(id, { storageClient })` instead of `StorageManager.openStorage(...)`. `StorageOpenOptions` replaces `StorageManagerOptions`. - `key_value_store.ts`: import `StorageOpenOptions` for the `open()` override signature. - `actor.test.ts`: the `openDataset` / `openRequestQueue` `forceCloud` tests now spy on the storage class's own `open()` (no more `StorageManager.prototype`), and assert the `storageClient` lives one level deeper in the options object. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/apify/src/actor.ts | 10 +++--- packages/apify/src/key_value_store.ts | 4 +-- test/apify/actor.test.ts | 44 +++++++++++++-------------- 3 files changed, 29 insertions(+), 29 deletions(-) diff --git a/packages/apify/src/actor.ts b/packages/apify/src/actor.ts index 9a35c4f55a..2e07534600 100644 --- a/packages/apify/src/actor.ts +++ b/packages/apify/src/actor.ts @@ -5,6 +5,7 @@ import type { EventTypeName, IStorage, RecordOptions, + StorageOpenOptions, UseStateOptions, } from '@crawlee/core'; import { @@ -13,7 +14,6 @@ import { purgeDefaultStorages, RequestQueue, serviceLocator, - StorageManager, } from '@crawlee/core'; import type { Awaitable, @@ -2254,14 +2254,16 @@ export class Actor { } private async _openStorage( - storageClass: Constructor, + storageClass: Constructor & { + open(id?: string | null, options?: StorageOpenOptions): Promise; + }, id?: string, options: OpenStorageOptions = {}, ) { - const client = options.forceCloud + const storageClient = options.forceCloud ? new ApifyStorageClient(this.apifyClient) : undefined; - return StorageManager.openStorage(storageClass, id, client); + return storageClass.open(id ?? null, { storageClient }); } private _ensureActorInit(methodCalled: string) { diff --git a/packages/apify/src/key_value_store.ts b/packages/apify/src/key_value_store.ts index 356180bdcd..d186a12e06 100644 --- a/packages/apify/src/key_value_store.ts +++ b/packages/apify/src/key_value_store.ts @@ -1,4 +1,4 @@ -import type { StorageManagerOptions } from '@crawlee/core'; +import type { StorageOpenOptions } from '@crawlee/core'; import { KeyValueStore as CoreKeyValueStore } from '@crawlee/core'; import type { KeyValueStoreInfo } from '@crawlee/types'; @@ -61,7 +61,7 @@ export class KeyValueStore extends CoreKeyValueStore { */ static override async open( storeIdOrName?: string | null, - options: StorageManagerOptions = {}, + options: StorageOpenOptions = {}, ): Promise { return super.open(storeIdOrName, options) as unknown as KeyValueStore; } diff --git a/test/apify/actor.test.ts b/test/apify/actor.test.ts index 9d6086b478..2f0736ee65 100644 --- a/test/apify/actor.test.ts +++ b/test/apify/actor.test.ts @@ -1,6 +1,6 @@ import { createPublicKey } from 'node:crypto'; -import { EventType, serviceLocator, StorageManager } from '@crawlee/core'; +import { EventType, RequestQueue, serviceLocator } from '@crawlee/core'; import { sleep } from '@crawlee/utils'; import type { ApifyEnv } from 'apify'; import { @@ -759,10 +759,7 @@ describe('Actor', () => { test('openRequestQueue should open storage', async () => { const queueId = 'abc'; const options = { forceCloud: true }; - const openStorageSpy = vitest.spyOn( - StorageManager.prototype, - 'openStorage', - ); + const openSpy = vitest.spyOn(RequestQueue, 'open'); // crawlee v4's `RequestQueueClient` exposes metadata via // `getMetadata()` (the v3 `get()` was dropped). @@ -772,19 +769,21 @@ describe('Actor', () => { }, }; - openStorageSpy.mockImplementationOnce(async () => mockRQ); + openSpy.mockImplementationOnce(async () => mockRQ as any); const queue = await sdk.openRequestQueue(queueId, options); - // The SDK now wraps `apifyClient` in an `ApifyStorageClient` - // adapter to satisfy crawlee v4's `StorageClient` interface. - expect(openStorageSpy).toBeCalledWith( + // `forceCloud: true` routes through an `ApifyStorageClient` + // adapter that satisfies crawlee v4's `StorageClient` interface. + expect(openSpy).toBeCalledWith( queueId, expect.objectContaining({ - createDatasetClient: expect.any(Function), - createKeyValueStoreClient: expect.any(Function), - createRequestQueueClient: expect.any(Function), + storageClient: expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), }), ); - expect(openStorageSpy).toBeCalledTimes(1); + expect(openSpy).toBeCalledTimes(1); // @ts-expect-error private prop expect(queue.initialCount).toBe(10); @@ -793,19 +792,18 @@ describe('Actor', () => { test('openDataset should open storage', async () => { const datasetName = 'abc'; const options = { forceCloud: true }; - const mockOpenStorage = vitest.spyOn( - StorageManager.prototype, - 'openStorage', - ); - mockOpenStorage.mockResolvedValueOnce(vitest.fn()); + const openSpy = vitest.spyOn(Dataset, 'open'); + openSpy.mockResolvedValueOnce(vitest.fn() as any); const ds = await sdk.openDataset(datasetName, options); - expect(mockOpenStorage).toBeCalledTimes(1); - expect(mockOpenStorage).toBeCalledWith( + expect(openSpy).toBeCalledTimes(1); + expect(openSpy).toBeCalledWith( datasetName, expect.objectContaining({ - createDatasetClient: expect.any(Function), - createKeyValueStoreClient: expect.any(Function), - createRequestQueueClient: expect.any(Function), + storageClient: expect.objectContaining({ + createDatasetClient: expect.any(Function), + createKeyValueStoreClient: expect.any(Function), + createRequestQueueClient: expect.any(Function), + }), }), ); }); From 70ea49c9813bced563d4e550edf982c90bf05759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Wed, 13 May 2026 16:46:23 +0200 Subject: [PATCH 11/12] test: use the resetGlobalState() helper in MemoryStorageEmulator --- test/MemoryStorageEmulator.ts | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/MemoryStorageEmulator.ts b/test/MemoryStorageEmulator.ts index dae4bedf00..bcc871c4f3 100644 --- a/test/MemoryStorageEmulator.ts +++ b/test/MemoryStorageEmulator.ts @@ -3,12 +3,13 @@ import { resolve } from 'node:path'; import { serviceLocator } from '@crawlee/core'; import { MemoryStorage } from '@crawlee/memory-storage'; -import { Actor } from 'apify'; import { ensureDir } from 'fs-extra'; import log from '@apify/log'; import { cryptoRandomObjectId } from '@apify/utilities'; +import { resetGlobalState } from './resetGlobalState.js'; + const LOCAL_EMULATION_DIR = resolve( __dirname, '..', @@ -21,9 +22,9 @@ export class MemoryStorageEmulator { async init(dirName = cryptoRandomObjectId(10)) { // crawlee v4 dropped `StorageManager.clearCache()` and - // `Configuration.useStorageClient()`; reset the service locator - // and re-register the in-memory client instead. - Actor.resetGlobalState(); + // `Configuration.useStorageClient()`; reset the global state and + // re-register the in-memory client instead. + resetGlobalState(); const localStorageDir = resolve(LOCAL_EMULATION_DIR, dirName); this.localStorageDirectories.push(localStorageDir); await ensureDir(localStorageDir); @@ -43,7 +44,7 @@ export class MemoryStorageEmulator { }); await Promise.all(promises); - Actor.resetGlobalState(); + resetGlobalState(); } static toString() { From 0352234e4a3e5b6cc23d03e247423621df9da981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Ad=C3=A1mek?= Date: Thu, 14 May 2026 15:25:29 +0200 Subject: [PATCH 12/12] fix(storage): implement storageExists on ApifyStorageClient MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit crawlee v4's `resolveStorageIdentifier` (called by `Dataset.open()` / `KeyValueStore.open()` / `RequestQueue.open()` with a string argument) asks the underlying `StorageClient` whether the string is a known id via `storageExists(id, type)`; if yes, it's treated as `{ id }`, otherwise as `{ name }`. `ApifyStorageClient` didn't implement the method, so every string argument took the name branch — and `createDatasetClient({ name })` calls `client.datasets().getOrCreate(name)`, which CREATES a new dataset whose *name* equals the user-supplied id when the matching id doesn't already exist on the platform. The user's intended dataset was never opened. Add the method. The Apify platform's `GET /v2/{kind}/{idOrName}` endpoint matches by either id or name, so the implementation calls `.get()` and confirms `info.id === id` before reporting existence — otherwise it falls through to the name branch as crawlee expects. Also refactor the three `create*Client` methods to share `resolveId`, `resourceClient`, and `collectionClient` helpers so the storage-type switching lives in one place. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/apify/src/apify_storage_client.ts | 74 ++++++++++++++-------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/packages/apify/src/apify_storage_client.ts b/packages/apify/src/apify_storage_client.ts index 6301411010..fb1b65df7a 100644 --- a/packages/apify/src/apify_storage_client.ts +++ b/packages/apify/src/apify_storage_client.ts @@ -9,60 +9,84 @@ import type { } from '@crawlee/types'; import type { ApifyClient } from 'apify-client'; +type StorageType = 'Dataset' | 'KeyValueStore' | 'RequestQueue'; + /** * Bridges `apify-client`'s synchronous resource accessors (`dataset(id)`, * `keyValueStore(id)`, `requestQueue(id, options?)`) to crawlee v4's * `StorageClient` interface (async factory methods accepting either an `id` * or a `name`). * - * When only a `name` is provided, we resolve it to a concrete ID via the - * collection client's `getOrCreate(name)` — matching the behaviour the SDK - * relied on in v3 when storages were opened by name. + * `storageExists()` is implemented so that `Dataset.open(idOrName)` and friends + * resolve a string argument to an id first (when one with that id exists on + * the platform) and fall back to a name otherwise — without this, crawlee's + * `resolveStorageIdentifier` would treat every string as a name and the SDK + * would silently create a brand-new storage whose name equals the passed-in id. + * + * When only a `name` is provided to a `create*Client` method, it is resolved + * to a concrete id via `getOrCreate(name)` — same behaviour the SDK relied on + * in v3. */ export class ApifyStorageClient implements StorageClient { constructor(private readonly client: ApifyClient) {} + async storageExists(id: string, type: StorageType): Promise { + // Apify's `GET /v2/{kind}/{idOrName}` endpoint matches by either id or + // name. Confirm it was an *id* match — otherwise crawlee should fall + // through to the `{ name }` branch. + const info = await this.resourceClient(id, type).get(); + return info?.id === id; + } + async createDatasetClient( options?: CreateDatasetClientOptions, ): Promise { - const id = - options?.id ?? - (options?.name - ? (await this.client.datasets().getOrCreate(options.name)).id - : undefined); + const id = await this.resolveId(options, 'Dataset'); // apify-client's resource clients overlap with `@crawlee/types`' shapes // but don't yet implement the v4-added members (`getMetadata`, // `getRecordPublicUrl`). Cast through for now; a follow-up should // bring apify-client into structural alignment. - return this.client.dataset(id ?? '') as unknown as DatasetClient; + return this.client.dataset(id) as unknown as DatasetClient; } async createKeyValueStoreClient( options?: CreateKeyValueStoreClientOptions, ): Promise { - const id = - options?.id ?? - (options?.name - ? (await this.client.keyValueStores().getOrCreate(options.name)) - .id - : undefined); - return this.client.keyValueStore( - id ?? '', - ) as unknown as KeyValueStoreClient; + const id = await this.resolveId(options, 'KeyValueStore'); + return this.client.keyValueStore(id) as unknown as KeyValueStoreClient; } async createRequestQueueClient( options?: CreateRequestQueueClientOptions, ): Promise { - const id = - options?.id ?? - (options?.name - ? (await this.client.requestQueues().getOrCreate(options.name)) - .id - : undefined); + const id = await this.resolveId(options, 'RequestQueue'); return this.client.requestQueue( - id ?? '', + id, options?.clientKey ? { clientKey: options.clientKey } : undefined, ) as unknown as RequestQueueClient; } + + private async resolveId( + options: { id?: string; name?: string } | undefined, + type: StorageType, + ): Promise { + if (options?.id) return options.id; + if (options?.name) { + return (await this.collectionClient(type).getOrCreate(options.name)) + .id; + } + return ''; + } + + private resourceClient(id: string, type: StorageType) { + if (type === 'Dataset') return this.client.dataset(id); + if (type === 'KeyValueStore') return this.client.keyValueStore(id); + return this.client.requestQueue(id); + } + + private collectionClient(type: StorageType) { + if (type === 'Dataset') return this.client.datasets(); + if (type === 'KeyValueStore') return this.client.keyValueStores(); + return this.client.requestQueues(); + } }