From 2bb6b9f993e6f634198d09676169436f80e823d8 Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 13:41:43 -0600 Subject: [PATCH 1/9] Adding requested improvements --- extension.json | 11 +++++++++- includes/Hooks.php | 51 +++++++++++++++++++++++++++++++++++++--------- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/extension.json b/extension.json index b09e568..86e380e 100644 --- a/extension.json +++ b/extension.json @@ -1,7 +1,7 @@ { "name": "CrawlerProtection", "author": "MyWikis LLC", - "version": "1.1.0", + "version": "1.2.0", "description": "Suite of protective measures to protect wikis from crawlers.", "type": "hook", "requires": { @@ -28,8 +28,17 @@ "whatlinkshere" ] }, + "CrawlerProtectionRawDenial": { + "value": false + }, "CrawlerProtectionUse418": { "value": false + }, + "CrawlerProtectionRawDenialHeader": { + "value": "HTTP/1.0 403 Forbidden" + }, + "CrawlerProtectionRawDenialText": { + "value": "403 Forbidden" } }, "license-name": "MIT", diff --git a/includes/Hooks.php b/includes/Hooks.php index a2d9bba..3376f45 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -65,6 +65,8 @@ public function onMediaWikiPerformAction( $request, $mediaWiki ) { + $config = MediaWikiServices::getInstance()->getMainConfig(); + $type = $request->getVal( 'type' ); $action = $request->getVal( 'action' ); $diffId = (int)$request->getVal( 'diff' ); @@ -80,7 +82,6 @@ public function onMediaWikiPerformAction( ) ) { $this->denyAccess( $output ); - return false; } return true; @@ -114,17 +115,35 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { $name = strtolower( $special->getName() ); if ( in_array( $name, $normalizedProtectedPages, true ) ) { - $out = $special->getContext()->getOutput(); - if ( $denyFast ) { - $this->denyAccessWith418(); - } - $this->denyAccess( $out ); + $outputPage = $special->getContext()->getOutput(); + $this->denyAccess( $outputPage ); return false; } return true; } + /** + * Helper: Triage denial method based on config + */ + protected function denyAccess( OutputPage $output ): void { + $config = MediaWikiServices::getInstance()->getMainConfig(); + + $rawDenial = $config->get( 'CrawlerProtectionRawDenial' ); + $denyWith418 = $config->get( 'CrawlerProtectionUse418' ); + + if ( $denyWith418 ) { + $this->denyAccessWith418(); + } elseif ( $rawDenial ) { + $this->denyAccessRaw( + $config->get( 'CrawlerProtectionRawDenialHeader' ), + $config->get( 'CrawlerProtectionRawDenialText' ) + ); + } else { + $this->denyAccessPretty( $output ); + } + } + /** * Helper: output 418 Teapot and halt the processing immediately * @@ -132,17 +151,29 @@ public function onSpecialPageBeforeExecute( $special, $subPage ) { * @suppress PhanPluginNeverReturnMethod */ protected function denyAccessWith418() { - header( 'HTTP/1.0 I\'m a teapot' ); - die( 'I\'m a teapot' ); + $this->denyAccessRaw( 'HTTP/1.0 I\'m a teapot', 'I\'m a teapot' ); + } + + /** + * Helper: output raw HTTP response and halt the processing immediately + * + * @param string $header + * @param string $message + * @return void + */ + protected function denyAccessRaw( string $header, string $message ): void { + $config = MediaWikiServices::getInstance()->getMainConfig(); + header( $header ); + die( $message ); } /** - * Helper: output 403 Access Denied page using i18n messages. + * Helper: output a pretty 403 Access Denied page using i18n messages. * * @param OutputPage $output * @return void */ - protected function denyAccess( $output ): void { + protected function denyAccessPretty( OutputPage $output ): void { $output->setStatusCode( 403 ); $output->addWikiTextAsInterface( wfMessage( 'crawlerprotection-accessdenied-text' )->plain() ); From 308b86c2e07770bef14c0807e03af979c72f5994 Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 14:15:34 -0600 Subject: [PATCH 2/9] Refactor to use dependency injection --- ...wiki-extensions-clean-code.instructions.md | 153 ++++++++++ extension.json | 7 +- includes/CrawlerProtectionService.php | 155 ++++++++++ includes/Hooks.php | 163 +++-------- includes/ResponseFactory.php | 115 ++++++++ includes/ServiceWiring.php | 46 +++ tests/phpunit/namespaced-stubs.php | 79 ++++-- .../unit/CrawlerProtectionServiceTest.php | 266 ++++++++++++++++++ tests/phpunit/unit/HooksTest.php | 236 +++------------- tests/phpunit/unit/ResponseFactoryTest.php | 138 +++++++++ 10 files changed, 1019 insertions(+), 339 deletions(-) create mode 100644 .github/instructions/mediawiki-extensions-clean-code.instructions.md create mode 100644 includes/CrawlerProtectionService.php create mode 100644 includes/ResponseFactory.php create mode 100644 includes/ServiceWiring.php create mode 100644 tests/phpunit/unit/CrawlerProtectionServiceTest.php create mode 100644 tests/phpunit/unit/ResponseFactoryTest.php diff --git a/.github/instructions/mediawiki-extensions-clean-code.instructions.md b/.github/instructions/mediawiki-extensions-clean-code.instructions.md new file mode 100644 index 0000000..e528c11 --- /dev/null +++ b/.github/instructions/mediawiki-extensions-clean-code.instructions.md @@ -0,0 +1,153 @@ +--- +applyTo: "*" +--- + +# MediaWiki Extension Clean Code Best Practices + +## Service container pattern + +Use the service container pattern (based on MediaWikiServices) to define how key objects are constructed and wired together. + +## Dependency injection + +Use this example from PluggableAuth to implement dependency injection. + +Below is an `includes/ServiceWiring.php` file. + +```php +return [ + 'PluggableAuthFactory' => + static function ( MediaWikiServices $services ): PluggableAuthFactory { + return new PluggableAuthFactory( + new ServiceOptions( PluggableAuthFactory::CONSTRUCTOR_OPTIONS, $services->getMainConfig() ), + ExtensionRegistry::getInstance(), + $services->getAuthManager(), + LoggerFactory::getInstance( 'PluggableAuth' ), + $services->getObjectFactory() + ); + }, + 'PluggableAuthService' => + static function ( MediaWikiServices $services ): PluggableAuthService { + return new PluggableAuthService( + new ServiceOptions( PluggableAuthService::CONSTRUCTOR_OPTIONS, $services->getMainConfig() ), + ExtensionRegistry::getInstance(), + $services->getUserFactory(), + $services->get( 'PluggableAuthFactory' ), + $services->get( 'PluggableAuth.GroupProcessorRunner' ), + $services->getPermissionManager(), + $services->getAuthManager(), + LoggerFactory::getInstance( 'PluggableAuth' ), + $services->getUrlUtils() + ); + }, + 'PluggableAuth.GroupProcessorFactory' => + static function ( MediaWikiServices $services ): GroupProcessorFactory { + $factory = new GroupProcessorFactory( + ExtensionRegistry::getInstance()->getAttribute( 'PluggableAuthGroupSyncs' ), + $services->getObjectFactory() + ); + $factory->setLogger( LoggerFactory::getInstance( 'PluggableAuth' ) ); + return $factory; + }, + 'PluggableAuth.GroupProcessorRunner' => + static function ( MediaWikiServices $services ): GroupProcessorRunner { + $factory = new GroupProcessorRunner( + $services->getService( 'PluggableAuth.GroupProcessorFactory' ) + ); + $factory->setLogger( LoggerFactory::getInstance( 'PluggableAuth' ) ); + return $factory; + }, +]; +``` + +The ServiceWiring.php file returns an associative array where: + +- Keys are service names (e.g., 'PluggableAuthFactory', 'PluggableAuth.GroupProcessorRunner') +- Values are factory closures that receive the MediaWikiServices container and return a fully constructed service instance +- When any code in the extension (or elsewhere) calls $services->get('PluggableAuthFactory'), MediaWiki invokes the corresponding closure, injects all dependencies, and returns the object. Services are typically lazily instantiated and cached (singleton per request). + +## Loading LocalSettings.php config values into the extension + +All configs should be loaded with dependency injection. + +## Consuming services + +### 1. Declaratively via extension.json + +#### Hook handlers + +MediaWiki's ObjectFactory reads service names from `extension.json` and automatically injects them as constructor arguments. + +```json +"HookHandlers": { + "main": { + "class": "MediaWiki\\Extension\\PluggableAuth\\PluggableAuthHooks", + "services": [ + "PluggableAuthService", + "UrlUtils" + ] + } +}, +``` + +### 2. Internally between services + +Services reference each other via `$services->get(...)`. + +For example, in the MediaWiki PluggableAuth extension: + +- PluggableAuthService depends on PluggableAuthFactory and PluggableAuth.GroupProcessorRunner +- PluggableAuth.GroupProcessorRunner depends on PluggableAuth.GroupProcessorFactory + +## Testing services + +Dependency injection means code is easily testable. Below is an example of a test file for services. + +```php +class PluggableAuthServiceTest extends MediaWikiIntegrationTestCase { + + /** + * @param array $links + * @param array $expectedLinks + * @param array $options + * @param bool $shouldOverrideDefaultLogout + * @param string $msg + * @return void + * @throws \PHPUnit\Framework\MockObject\Exception + * @covers MediaWiki\Extension\PluggableAuth\PluggableAuthService::modifyLogoutLink + * @dataProvider provideTestModifyLogoutLinkData + */ + public function testModifyLogoutLink( $links, $expectedLinks, $options, $shouldOverrideDefaultLogout, $msg ) { + $serviceOptions = new ServiceOptions( + PluggableAuthService::CONSTRUCTOR_OPTIONS, + $options + ); + $extensionRegistry = $this->createMock( ExtensionRegistry::class ); + $userFactory = $this->createMock( UserFactory::class ); + $pluggableAuthPlugin = $this->createMock( PluggableAuthPlugin::class ); + $pluggableAuthPlugin->method( 'shouldOverrideDefaultLogout' )->willReturn( $shouldOverrideDefaultLogout ); + $pluggableAuthFactory = $this->createMock( PluggableAuthFactory::class ); + $pluggableAuthFactory->method( 'getInstance' )->willReturn( $pluggableAuthPlugin ); + $groupProcessorRunner = $this->createMock( GroupProcessorRunner::class ); + $permissionManager = $this->createMock( PermissionManager::class ); + $authManager = $this->createMock( AuthManager::class ); + $logger = $this->createMock( LoggerInterface::class ); + $service = new PluggableAuthService( + $serviceOptions, + $extensionRegistry, + $userFactory, + $pluggableAuthFactory, + $groupProcessorRunner, + $permissionManager, + $authManager, + $logger, + $this->getServiceContainer()->getUrlUtils() + ); + + $service->modifyLogoutLink( $links ); + + $this->assertEquals( $expectedLinks, $links, $msg ); + } + +} +``` \ No newline at end of file diff --git a/extension.json b/extension.json index 86e380e..c591431 100644 --- a/extension.json +++ b/extension.json @@ -13,7 +13,9 @@ "HookHandlers": { "main": { "class": "MediaWiki\\Extension\\CrawlerProtection\\Hooks", - "services": [] + "services": [ + "CrawlerProtection.CrawlerProtectionService" + ] } }, "Hooks": { @@ -41,6 +43,9 @@ "value": "403 Forbidden" } }, + "ServiceWiringFiles": [ + "includes/ServiceWiring.php" + ], "license-name": "MIT", "Tests": { "phpunit": "tests/phpunit" diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php new file mode 100644 index 0000000..ee77b8b --- /dev/null +++ b/includes/CrawlerProtectionService.php @@ -0,0 +1,155 @@ +assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->options = $options; + $this->responseFactory = $responseFactory; + } + + /** + * Check whether a regular page request should be blocked. + * + * Returns false (= abort further processing) when the request is + * blocked, true otherwise. + * + * @param OutputPage $output + * @param User $user + * @param WebRequest $request + * @return bool + */ + public function checkPerformAction( + OutputPage $output, + User $user, + WebRequest $request + ): bool { + if ( $user->isRegistered() ) { + return true; + } + + $type = $request->getVal( 'type' ); + $action = $request->getVal( 'action' ); + $diffId = (int)$request->getVal( 'diff' ); + $oldId = (int)$request->getVal( 'oldid' ); + + if ( + $type === 'revision' + || $action === 'history' + || $diffId > 0 + || $oldId > 0 + ) { + $this->responseFactory->denyAccess( $output ); + return false; + } + + return true; + } + + /** + * Check whether a special page request should be blocked. + * + * Returns false (= abort further processing) when the request is + * blocked, true otherwise. + * + * @param string $specialPageName The canonical special page name + * @param OutputPage $output + * @param User $user + * @return bool + */ + public function checkSpecialPage( + string $specialPageName, + OutputPage $output, + User $user + ): bool { + if ( $user->isRegistered() ) { + return true; + } + + if ( $this->isProtectedSpecialPage( $specialPageName ) ) { + $this->responseFactory->denyAccess( $output ); + return false; + } + + return true; + } + + /** + * Determine whether the given special page name is in the + * configured list of protected special pages. + * + * @param string $specialPageName + * @return bool + */ + public function isProtectedSpecialPage( string $specialPageName ): bool { + $protectedSpecialPages = $this->options->get( 'CrawlerProtectedSpecialPages' ); + + // Normalize protected special pages: lowercase and strip 'Special:' prefix + $normalizedProtectedPages = array_map( + static function ( string $p ): string { + $lower = strtolower( $p ); + if ( strpos( $lower, strtolower( self::SPECIAL_PAGE_PREFIX ) ) === 0 ) { + return substr( $lower, strlen( self::SPECIAL_PAGE_PREFIX ) ); + } + return $lower; + }, + $protectedSpecialPages + ); + + $name = strtolower( $specialPageName ); + + return in_array( $name, $normalizedProtectedPages, true ); + } +} diff --git a/includes/Hooks.php b/includes/Hooks.php index 3376f45..bf11661 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -1,4 +1,22 @@ crawlerProtectionService = $crawlerProtectionService; + } /** * Block sensitive page views for anonymous users via MediaWikiPerformAction. - * Handles: - * - ?type=revision - * - ?action=history - * - ?diff=1234 - * - ?oldid=1234 - * - * Special pages (e.g. Special:WhatLinksHere) are handled separately. * * @param OutputPage $output * @param Article $article @@ -65,123 +90,25 @@ public function onMediaWikiPerformAction( $request, $mediaWiki ) { - $config = MediaWikiServices::getInstance()->getMainConfig(); - - $type = $request->getVal( 'type' ); - $action = $request->getVal( 'action' ); - $diffId = (int)$request->getVal( 'diff' ); - $oldId = (int)$request->getVal( 'oldid' ); - - if ( - !$user->isRegistered() - && ( - $type === 'revision' - || $action === 'history' - || $diffId > 0 - || $oldId > 0 - ) - ) { - $this->denyAccess( $output ); - } - - return true; + return $this->crawlerProtectionService->checkPerformAction( + $output, + $user, + $request + ); } /** - * Block Special:RecentChangesLinked, Special:WhatLinksHere, and Special:MobileDiff for anonymous users. + * Block protected special pages for anonymous users. * * @param SpecialPage $special * @param string|null $subPage * @return bool False to abort execution */ public function onSpecialPageBeforeExecute( $special, $subPage ) { - $user = $special->getContext()->getUser(); - if ( $user->isRegistered() ) { - // logged-in users: allow - return true; - } - - $config = MediaWikiServices::getInstance()->getMainConfig(); - $protectedSpecialPages = $config->get( 'CrawlerProtectedSpecialPages' ); - $denyFast = $config->get( 'CrawlerProtectionUse418' ); - - // Normalize protected special pages: lowercase and strip 'Special:' prefix - $normalizedProtectedPages = array_map( - fn ( $p ) => ( $p = strtolower( $p ) ) && strpos( $p, strtolower( self::SPECIAL_PAGE_PREFIX ) ) === 0 - ? substr( $p, 8 ) - : $p, - $protectedSpecialPages + return $this->crawlerProtectionService->checkSpecialPage( + $special->getName(), + $special->getContext()->getOutput(), + $special->getContext()->getUser() ); - - $name = strtolower( $special->getName() ); - if ( in_array( $name, $normalizedProtectedPages, true ) ) { - $outputPage = $special->getContext()->getOutput(); - $this->denyAccess( $outputPage ); - return false; - } - - return true; - } - - /** - * Helper: Triage denial method based on config - */ - protected function denyAccess( OutputPage $output ): void { - $config = MediaWikiServices::getInstance()->getMainConfig(); - - $rawDenial = $config->get( 'CrawlerProtectionRawDenial' ); - $denyWith418 = $config->get( 'CrawlerProtectionUse418' ); - - if ( $denyWith418 ) { - $this->denyAccessWith418(); - } elseif ( $rawDenial ) { - $this->denyAccessRaw( - $config->get( 'CrawlerProtectionRawDenialHeader' ), - $config->get( 'CrawlerProtectionRawDenialText' ) - ); - } else { - $this->denyAccessPretty( $output ); - } - } - - /** - * Helper: output 418 Teapot and halt the processing immediately - * - * @return void - * @suppress PhanPluginNeverReturnMethod - */ - protected function denyAccessWith418() { - $this->denyAccessRaw( 'HTTP/1.0 I\'m a teapot', 'I\'m a teapot' ); - } - - /** - * Helper: output raw HTTP response and halt the processing immediately - * - * @param string $header - * @param string $message - * @return void - */ - protected function denyAccessRaw( string $header, string $message ): void { - $config = MediaWikiServices::getInstance()->getMainConfig(); - header( $header ); - die( $message ); - } - - /** - * Helper: output a pretty 403 Access Denied page using i18n messages. - * - * @param OutputPage $output - * @return void - */ - protected function denyAccessPretty( OutputPage $output ): void { - $output->setStatusCode( 403 ); - $output->addWikiTextAsInterface( wfMessage( 'crawlerprotection-accessdenied-text' )->plain() ); - - if ( version_compare( MW_VERSION, '1.41', '<' ) ) { - $output->setPageTitle( wfMessage( 'crawlerprotection-accessdenied-title' ) ); - } else { - // @phan-suppress-next-line PhanUndeclaredMethod Exists in 1.41+ - $output->setPageTitleMsg( wfMessage( 'crawlerprotection-accessdenied-title' ) ); - } } } diff --git a/includes/ResponseFactory.php b/includes/ResponseFactory.php new file mode 100644 index 0000000..b611ad3 --- /dev/null +++ b/includes/ResponseFactory.php @@ -0,0 +1,115 @@ +assertRequiredOptions( self::CONSTRUCTOR_OPTIONS ); + $this->options = $options; + } + + /** + * Deny access using the configured strategy. + * + * @param OutputPage $output Used only for the "pretty" strategy + * @return void + */ + public function denyAccess( OutputPage $output ): void { + if ( $this->options->get( 'CrawlerProtectionUse418' ) ) { + $this->denyAccessWith418(); + } elseif ( $this->options->get( 'CrawlerProtectionRawDenial' ) ) { + $this->denyAccessRaw( + $this->options->get( 'CrawlerProtectionRawDenialHeader' ), + $this->options->get( 'CrawlerProtectionRawDenialText' ) + ); + } else { + $this->denyAccessPretty( $output ); + } + } + + /** + * Output a 418 "I'm a teapot" response and halt. + * + * @return void + * @suppress PhanPluginNeverReturnMethod + */ + protected function denyAccessWith418(): void { + $this->denyAccessRaw( 'HTTP/1.0 I\'m a teapot', 'I\'m a teapot' ); + } + + /** + * Output a raw HTTP response and halt. + * + * @param string $header + * @param string $message + * @return void + */ + protected function denyAccessRaw( string $header, string $message ): void { + header( $header ); + die( $message ); + } + + /** + * Output a pretty 403 Access Denied page using i18n messages. + * + * @param OutputPage $output + * @return void + */ + protected function denyAccessPretty( OutputPage $output ): void { + $output->setStatusCode( 403 ); + $output->addWikiTextAsInterface( + wfMessage( 'crawlerprotection-accessdenied-text' )->plain() + ); + + if ( version_compare( MW_VERSION, '1.41', '<' ) ) { + $output->setPageTitle( wfMessage( 'crawlerprotection-accessdenied-title' ) ); + } else { + // @phan-suppress-next-line PhanUndeclaredMethod Exists in 1.41+ + $output->setPageTitleMsg( wfMessage( 'crawlerprotection-accessdenied-title' ) ); + } + } +} diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php new file mode 100644 index 0000000..4d573ac --- /dev/null +++ b/includes/ServiceWiring.php @@ -0,0 +1,46 @@ + + static function ( MediaWikiServices $services ): ResponseFactory { + return new ResponseFactory( + new ServiceOptions( + ResponseFactory::CONSTRUCTOR_OPTIONS, + $services->getMainConfig() + ) + ); + }, + 'CrawlerProtection.CrawlerProtectionService' => + static function ( MediaWikiServices $services ): CrawlerProtectionService { + return new CrawlerProtectionService( + new ServiceOptions( + CrawlerProtectionService::CONSTRUCTOR_OPTIONS, + $services->getMainConfig() + ), + $services->get( 'CrawlerProtection.ResponseFactory' ) + ); + }, +]; diff --git a/tests/phpunit/namespaced-stubs.php b/tests/phpunit/namespaced-stubs.php index 1dfdf57..eff675a 100644 --- a/tests/phpunit/namespaced-stubs.php +++ b/tests/phpunit/namespaced-stubs.php @@ -16,6 +16,59 @@ public function onSpecialPageBeforeExecute( $special, $subPage ); } } +// ServiceOptions stub +namespace MediaWiki\Config { + class ServiceOptions { + /** @var array */ + private array $options = []; + + /** + * @param string[] $keys + * @param mixed ...$sources + */ + public function __construct( array $keys, ...$sources ) { + foreach ( $sources as $source ) { + if ( $source instanceof Config ) { + foreach ( $keys as $key ) { + if ( !array_key_exists( $key, $this->options ) ) { + $this->options[$key] = $source->get( $key ); + } + } + } elseif ( is_array( $source ) ) { + foreach ( $keys as $key ) { + if ( !array_key_exists( $key, $this->options ) && array_key_exists( $key, $source ) ) { + $this->options[$key] = $source[$key]; + } + } + } + } + } + + /** + * @param string[] $expectedKeys + */ + public function assertRequiredOptions( array $expectedKeys ): void { + // no-op in tests + } + + /** + * @param string $key + * @return mixed + */ + public function get( string $key ) { + return $this->options[$key] ?? null; + } + } + + interface Config { + /** + * @param string $name + * @return mixed + */ + public function get( $name ); + } +} + // Core classes in their proper namespaces namespace MediaWiki\Output { class OutputPage { @@ -76,24 +129,11 @@ class ActionEntryPoint { } } -namespace MediaWiki\Config { - interface Config { - /** - * @param string $name - * @return mixed - */ - public function get( $name ); - } -} - namespace MediaWiki { class MediaWikiServices { /** @var MediaWikiServices|null */ private static $instance = null; - /** @var bool Control CrawlerProtectionUse418 config for testing */ - public static $testUse418 = false; - /** * @return MediaWikiServices */ @@ -130,19 +170,6 @@ public function getMainConfig() { * @return mixed */ public function get( $name ) { - if ( $name === 'CrawlerProtectedSpecialPages' ) { - return [ - 'RecentChangesLinked', - 'WhatLinksHere', - 'MobileDiff', - 'recentchangeslinked', - 'whatlinkshere', - 'mobilediff' - ]; - } - if ( $name === 'CrawlerProtectionUse418' ) { - return MediaWikiServices::$testUse418; - } return null; } }; diff --git a/tests/phpunit/unit/CrawlerProtectionServiceTest.php b/tests/phpunit/unit/CrawlerProtectionServiceTest.php new file mode 100644 index 0000000..d2d198f --- /dev/null +++ b/tests/phpunit/unit/CrawlerProtectionServiceTest.php @@ -0,0 +1,266 @@ + $protectedPages ] + ); + + $responseFactory ??= $this->createMock( ResponseFactory::class ); + + return new CrawlerProtectionService( $options, $responseFactory ); + } + + // --------------------------------------------------------------- + // checkPerformAction tests + // --------------------------------------------------------------- + + /** + * @covers ::checkPerformAction + */ + public function testCheckPerformActionAllowsRegisteredUser() { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( true ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( [ + [ 'type', null, 'revision' ], + ] ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->never() )->method( 'denyAccess' ); + + $service = $this->buildService( [], $responseFactory ); + $this->assertTrue( $service->checkPerformAction( $output, $user, $request ) ); + } + + /** + * @covers ::checkPerformAction + * @dataProvider provideBlockedRequestParams + * + * @param array $getValMap + * @param string $msg + */ + public function testCheckPerformActionBlocksAnonymous( array $getValMap, string $msg ) { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( $getValMap ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->once() )->method( 'denyAccess' )->with( $output ); + + $service = $this->buildService( [], $responseFactory ); + $this->assertFalse( $service->checkPerformAction( $output, $user, $request ), $msg ); + } + + /** + * Data provider for request parameters that should trigger a block. + * + * @return array + */ + public function provideBlockedRequestParams(): array { + return [ + 'type=revision' => [ + [ [ 'type', null, 'revision' ], [ 'action', null, null ], [ 'diff', null, null ], [ 'oldid', null, null ] ], + 'type=revision should be blocked', + ], + 'action=history' => [ + [ [ 'type', null, null ], [ 'action', null, 'history' ], [ 'diff', null, null ], [ 'oldid', null, null ] ], + 'action=history should be blocked', + ], + 'diff=42' => [ + [ [ 'type', null, null ], [ 'action', null, null ], [ 'diff', null, '42' ], [ 'oldid', null, null ] ], + 'diff=42 should be blocked', + ], + 'oldid=99' => [ + [ [ 'type', null, null ], [ 'action', null, null ], [ 'diff', null, null ], [ 'oldid', null, '99' ] ], + 'oldid=99 should be blocked', + ], + ]; + } + + /** + * @covers ::checkPerformAction + */ + public function testCheckPerformActionAllowsNormalAnonymousView() { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( [ + [ 'type', null, null ], + [ 'action', null, 'view' ], + [ 'diff', null, null ], + [ 'oldid', null, null ], + ] ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->never() )->method( 'denyAccess' ); + + $service = $this->buildService( [], $responseFactory ); + $this->assertTrue( $service->checkPerformAction( $output, $user, $request ) ); + } + + // --------------------------------------------------------------- + // checkSpecialPage tests + // --------------------------------------------------------------- + + /** + * @covers ::checkSpecialPage + * @dataProvider provideBlockedSpecialPages + * + * @param string $specialPageName + */ + public function testCheckSpecialPageBlocksAnonymous( string $specialPageName ) { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->once() )->method( 'denyAccess' )->with( $output ); + + $service = $this->buildService( + [ 'RecentChangesLinked', 'WhatLinksHere', 'MobileDiff' ], + $responseFactory + ); + $this->assertFalse( $service->checkSpecialPage( $specialPageName, $output, $user ) ); + } + + /** + * @covers ::checkSpecialPage + * @dataProvider provideBlockedSpecialPages + * + * @param string $specialPageName + */ + public function testCheckSpecialPageAllowsRegistered( string $specialPageName ) { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( true ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->never() )->method( 'denyAccess' ); + + $service = $this->buildService( + [ 'RecentChangesLinked', 'WhatLinksHere', 'MobileDiff' ], + $responseFactory + ); + $this->assertTrue( $service->checkSpecialPage( $specialPageName, $output, $user ) ); + } + + /** + * @covers ::checkSpecialPage + */ + public function testCheckSpecialPageAllowsUnprotected() { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->never() )->method( 'denyAccess' ); + + $service = $this->buildService( + [ 'RecentChangesLinked', 'WhatLinksHere', 'MobileDiff' ], + $responseFactory + ); + $this->assertTrue( $service->checkSpecialPage( 'Search', $output, $user ) ); + } + + // --------------------------------------------------------------- + // isProtectedSpecialPage tests + // --------------------------------------------------------------- + + /** + * @covers ::isProtectedSpecialPage + */ + public function testIsProtectedSpecialPageStripsPrefix() { + $service = $this->buildService( [ 'Special:WhatLinksHere' ] ); + $this->assertTrue( $service->isProtectedSpecialPage( 'WhatLinksHere' ) ); + } + + /** + * @covers ::isProtectedSpecialPage + */ + public function testIsProtectedSpecialPageIsCaseInsensitive() { + $service = $this->buildService( [ 'WhatLinksHere' ] ); + $this->assertTrue( $service->isProtectedSpecialPage( 'whatlinkshere' ) ); + $this->assertTrue( $service->isProtectedSpecialPage( 'WHATLINKSHERE' ) ); + $this->assertTrue( $service->isProtectedSpecialPage( 'WhAtLiNkShErE' ) ); + } + + /** + * @covers ::isProtectedSpecialPage + */ + public function testIsProtectedSpecialPageReturnsFalseForUnprotected() { + $service = $this->buildService( [ 'WhatLinksHere' ] ); + $this->assertFalse( $service->isProtectedSpecialPage( 'Search' ) ); + } + + /** + * Data provider for blocked special pages. + * + * @return array + */ + public function provideBlockedSpecialPages(): array { + return [ + 'RecentChangesLinked' => [ 'RecentChangesLinked' ], + 'WhatLinksHere' => [ 'WhatLinksHere' ], + 'MobileDiff' => [ 'MobileDiff' ], + 'RecentChangesLinked lowercase' => [ 'recentchangeslinked' ], + 'WhatLinksHere lowercase' => [ 'whatlinkshere' ], + 'MobileDiff lowercase' => [ 'mobilediff' ], + 'MobileDiff mixed case' => [ 'MoBiLeDiFf' ], + ]; + } +} diff --git a/tests/phpunit/unit/HooksTest.php b/tests/phpunit/unit/HooksTest.php index 7345cb4..aead599 100644 --- a/tests/phpunit/unit/HooksTest.php +++ b/tests/phpunit/unit/HooksTest.php @@ -2,6 +2,7 @@ namespace MediaWiki\Extension\CrawlerProtection\Tests; +use MediaWiki\Extension\CrawlerProtection\CrawlerProtectionService; use MediaWiki\Extension\CrawlerProtection\Hooks; use PHPUnit\Framework\TestCase; @@ -62,185 +63,86 @@ public static function setUpBeforeClass(): void { : '\WebRequest'; } - /** - * Reset test state after each test to prevent test pollution - * - * @return void - */ - protected function tearDown(): void { - parent::tearDown(); - // Reset the test config flag (only exists in stub environment) - if ( property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { - \MediaWiki\MediaWikiServices::$testUse418 = false; - } - // Only reset if the method exists (in our test stubs) - if ( method_exists( '\MediaWiki\MediaWikiServices', 'resetForTesting' ) ) { - \MediaWiki\MediaWikiServices::resetForTesting(); - } - } - /** * @covers ::onMediaWikiPerformAction */ - public function testRevisionTypeBlocksAnonymous() { + public function testOnMediaWikiPerformActionDelegatesToService() { $output = $this->createMock( self::$outputPageClassName ); - - $request = $this->createMock( self::$webRequestClassName ); - $request->method( 'getVal' )->willReturnMap( [ - [ 'type', null, 'revision' ], - ] ); - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( false ); - + $request = $this->createMock( self::$webRequestClassName ); $article = $this->createMock( self::$articleClassName ); $title = $this->createMock( self::$titleClassName ); $wiki = $this->createMock( self::$actionEntryPointClassName ); - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) - ->getMock(); - $runner->expects( $this->once() )->method( 'denyAccess' ); + $service = $this->createMock( CrawlerProtectionService::class ); + $service->expects( $this->once() ) + ->method( 'checkPerformAction' ) + ->with( $output, $user, $request ) + ->willReturn( false ); + + $hooks = new Hooks( $service ); + $result = $hooks->onMediaWikiPerformAction( + $output, $article, $title, $user, $request, $wiki + ); - $result = $runner->onMediaWikiPerformAction( $output, $article, $title, $user, $request, $wiki ); $this->assertFalse( $result ); } /** * @covers ::onMediaWikiPerformAction */ - public function testRevisionTypeAllowsLoggedIn() { + public function testOnMediaWikiPerformActionPassesThroughTrue() { $output = $this->createMock( self::$outputPageClassName ); - - $request = $this->createMock( self::$webRequestClassName ); - $request->method( 'getVal' )->willReturnMap( [ - [ 'type', null, 'revision' ], - ] ); - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( true ); - - $article = $this->createMock( self::$articleClassName ); - $title = $this->createMock( self::$titleClassName ); - $wiki = $this->createMock( self::$actionEntryPointClassName ); - - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) - ->getMock(); - $runner->expects( $this->never() )->method( 'denyAccess' ); - - $result = $runner->onMediaWikiPerformAction( $output, $article, $title, $user, $request, $wiki ); - $this->assertTrue( $result ); - } - - /** - * @covers ::onMediaWikiPerformAction - */ - public function testNonRevisionTypeAlwaysAllowed() { - $output = $this->createMock( self::$outputPageClassName ); - $request = $this->createMock( self::$webRequestClassName ); - $request->method( 'getVal' )->willReturnMap( [ - [ 'type', null, 'view' ], - ] ); - - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( false ); - $article = $this->createMock( self::$articleClassName ); $title = $this->createMock( self::$titleClassName ); $wiki = $this->createMock( self::$actionEntryPointClassName ); - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) - ->getMock(); - $runner->expects( $this->never() )->method( 'denyAccess' ); + $service = $this->createMock( CrawlerProtectionService::class ); + $service->expects( $this->once() ) + ->method( 'checkPerformAction' ) + ->willReturn( true ); + + $hooks = new Hooks( $service ); + $result = $hooks->onMediaWikiPerformAction( + $output, $article, $title, $user, $request, $wiki + ); - $result = $runner->onMediaWikiPerformAction( $output, $article, $title, $user, $request, $wiki ); $this->assertTrue( $result ); } /** * @covers ::onSpecialPageBeforeExecute - * @dataProvider provideBlockedSpecialPages - * @param string $specialPageName */ - public function testSpecialPageBlocksAnonymous( $specialPageName ) { - // Skip this test in MediaWiki environment - it requires service container - if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { - $this->markTestSkipped( - 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' - ); - } - + public function testOnSpecialPageBeforeExecuteDelegatesToService() { $output = $this->createMock( self::$outputPageClassName ); - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( false ); $context = $this->createMockContext( $user, $output ); $special = $this->createMock( self::$specialPageClassName ); - $special->method( 'getName' )->willReturn( $specialPageName ); + $special->method( 'getName' )->willReturn( 'WhatLinksHere' ); $special->method( 'getContext' )->willReturn( $context ); - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess', 'denyAccessWith418' ] ) - ->getMock(); - $runner->expects( $this->once() )->method( 'denyAccess' )->with( $output ); - - $result = $runner->onSpecialPageBeforeExecute( $special, null ); - $this->assertFalse( $result ); - } - - /** - * @covers ::onSpecialPageBeforeExecute - * @dataProvider provideBlockedSpecialPages - * @param string $specialPageName - */ - public function testSpecialPageAllowsLoggedIn( $specialPageName ) { - // Skip this test in MediaWiki environment - it requires service container - if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { - $this->markTestSkipped( - 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' - ); - } - - $output = $this->createMock( self::$outputPageClassName ); - - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( true ); - - $context = $this->createMockContext( $user, $output ); - - $special = $this->createMock( self::$specialPageClassName ); - $special->method( 'getName' )->willReturn( $specialPageName ); - $special->method( 'getContext' )->willReturn( $context ); + $service = $this->createMock( CrawlerProtectionService::class ); + $service->expects( $this->once() ) + ->method( 'checkSpecialPage' ) + ->with( 'WhatLinksHere', $output, $user ) + ->willReturn( false ); - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) - ->getMock(); - $runner->expects( $this->never() )->method( 'denyAccess' ); + $hooks = new Hooks( $service ); + $result = $hooks->onSpecialPageBeforeExecute( $special, null ); - $result = $runner->onSpecialPageBeforeExecute( $special, null ); - $this->assertTrue( $result ); + $this->assertFalse( $result ); } /** * @covers ::onSpecialPageBeforeExecute */ - public function testUnblockedSpecialPageAllowsAnonymous() { - // Skip this test in MediaWiki environment - it requires service container - if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { - $this->markTestSkipped( - 'Test requires stub MediaWikiServices. Skipped in MediaWiki unit test environment.' - ); - } - + public function testOnSpecialPageBeforeExecutePassesThroughTrue() { $output = $this->createMock( self::$outputPageClassName ); - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( false ); $context = $this->createMockContext( $user, $output ); @@ -248,12 +150,14 @@ public function testUnblockedSpecialPageAllowsAnonymous() { $special->method( 'getName' )->willReturn( 'Search' ); $special->method( 'getContext' )->willReturn( $context ); - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccess' ] ) - ->getMock(); - $runner->expects( $this->never() )->method( 'denyAccess' ); + $service = $this->createMock( CrawlerProtectionService::class ); + $service->expects( $this->once() ) + ->method( 'checkSpecialPage' ) + ->willReturn( true ); + + $hooks = new Hooks( $service ); + $result = $hooks->onSpecialPageBeforeExecute( $special, null ); - $result = $runner->onSpecialPageBeforeExecute( $special, null ); $this->assertTrue( $result ); } @@ -265,7 +169,7 @@ public function testUnblockedSpecialPageAllowsAnonymous() { * @return \stdClass Mock context */ private function createMockContext( $user, $output ) { - $context = new class( $user, $output ) { + return new class( $user, $output ) { /** @var \PHPUnit\Framework\MockObject\MockObject */ private $user; /** @var \PHPUnit\Framework\MockObject\MockObject */ @@ -294,61 +198,5 @@ public function getOutput() { return $this->output; } }; - return $context; - } - - /** - * @covers ::onSpecialPageBeforeExecute - * @covers ::denyAccessWith418 - */ - public function testSpecialPageCallsDenyAccessWith418WhenConfigured() { - // This test only works with our test stubs, not in MediaWiki's PHPUnit environment - if ( !property_exists( '\MediaWiki\MediaWikiServices', 'testUse418' ) ) { - $this->markTestSkipped( - 'Test requires stub MediaWikiServices. Skipped in MediaWiki integration tests.' - ); - } - - // Enable 418 response in the test stub config - \MediaWiki\MediaWikiServices::$testUse418 = true; - - $output = $this->createMock( self::$outputPageClassName ); - - $user = $this->createMock( self::$userClassName ); - $user->method( 'isRegistered' )->willReturn( false ); - - $context = $this->createMockContext( $user, $output ); - - $special = $this->createMock( self::$specialPageClassName ); - $special->method( 'getName' )->willReturn( 'WhatLinksHere' ); - $special->method( 'getContext' )->willReturn( $context ); - - $runner = $this->getMockBuilder( Hooks::class ) - ->onlyMethods( [ 'denyAccessWith418' ] ) - ->getMock(); - // When denyFast is true, only denyAccessWith418 is called (it dies before denyAccess) - $runner->expects( $this->once() )->method( 'denyAccessWith418' ); - - $result = $runner->onSpecialPageBeforeExecute( $special, null ); - $this->assertFalse( $result ); - } - - /** - * Data provider for blocked special pages. - * - * @return array - */ - public function provideBlockedSpecialPages() { - return [ - 'RecentChangesLinked' => [ 'RecentChangesLinked' ], - 'WhatLinksHere' => [ 'WhatLinksHere' ], - 'MobileDiff' => [ 'MobileDiff' ], - // Test case sensitivity - 'RecentChangesLinked lowercase' => [ 'recentchangeslinked' ], - 'WhatLinksHere lowercase' => [ 'whatlinkshere' ], - 'MobileDiff lowercase' => [ 'mobilediff' ], - // Test mixed case - 'MobileDiff mixed case' => [ 'MoBiLeDiFf' ], - ]; } } diff --git a/tests/phpunit/unit/ResponseFactoryTest.php b/tests/phpunit/unit/ResponseFactoryTest.php new file mode 100644 index 0000000..e729c46 --- /dev/null +++ b/tests/phpunit/unit/ResponseFactoryTest.php @@ -0,0 +1,138 @@ + false, + 'CrawlerProtectionRawDenial' => false, + 'CrawlerProtectionRawDenialHeader' => 'HTTP/1.0 403 Forbidden', + 'CrawlerProtectionRawDenialText' => '403 Forbidden', + ]; + + $config = array_merge( $defaults, $overrides ); + + return new ResponseFactory( + new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, $config ) + ); + } + + /** + * @covers ::denyAccess + * @covers ::denyAccessPretty + */ + public function testDenyAccessPrettySetStatusCode() { + $output = $this->createMock( self::$outputPageClassName ); + $output->expects( $this->once() ) + ->method( 'setStatusCode' ) + ->with( 403 ); + $output->expects( $this->once() ) + ->method( 'addWikiTextAsInterface' ); + + $factory = $this->buildFactory(); + $factory->denyAccess( $output ); + } + + /** + * @covers ::denyAccess + */ + public function testDenyAccessChooses418WhenConfigured() { + $factory = $this->getMockBuilder( ResponseFactory::class ) + ->setConstructorArgs( [ + new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, [ + 'CrawlerProtectionUse418' => true, + 'CrawlerProtectionRawDenial' => false, + 'CrawlerProtectionRawDenialHeader' => '', + 'CrawlerProtectionRawDenialText' => '', + ] ) + ] ) + ->onlyMethods( [ 'denyAccessWith418' ] ) + ->getMock(); + + $factory->expects( $this->once() )->method( 'denyAccessWith418' ); + + $output = $this->createMock( self::$outputPageClassName ); + $factory->denyAccess( $output ); + } + + /** + * @covers ::denyAccess + */ + public function testDenyAccessChoosesRawWhenConfigured() { + $factory = $this->getMockBuilder( ResponseFactory::class ) + ->setConstructorArgs( [ + new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, [ + 'CrawlerProtectionUse418' => false, + 'CrawlerProtectionRawDenial' => true, + 'CrawlerProtectionRawDenialHeader' => 'HTTP/1.0 403 Forbidden', + 'CrawlerProtectionRawDenialText' => '403 Forbidden', + ] ) + ] ) + ->onlyMethods( [ 'denyAccessRaw' ] ) + ->getMock(); + + $factory->expects( $this->once() ) + ->method( 'denyAccessRaw' ) + ->with( 'HTTP/1.0 403 Forbidden', '403 Forbidden' ); + + $output = $this->createMock( self::$outputPageClassName ); + $factory->denyAccess( $output ); + } + + /** + * @covers ::denyAccess + */ + public function testDenyAccessFallsThroughToPretty() { + $factory = $this->getMockBuilder( ResponseFactory::class ) + ->setConstructorArgs( [ + new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, [ + 'CrawlerProtectionUse418' => false, + 'CrawlerProtectionRawDenial' => false, + 'CrawlerProtectionRawDenialHeader' => '', + 'CrawlerProtectionRawDenialText' => '', + ] ) + ] ) + ->onlyMethods( [ 'denyAccessPretty' ] ) + ->getMock(); + + $output = $this->createMock( self::$outputPageClassName ); + $factory->expects( $this->once() ) + ->method( 'denyAccessPretty' ) + ->with( $output ); + + $factory->denyAccess( $output ); + } + + /** + * @covers ::__construct + */ + public function testConstructorAcceptsValidOptions() { + $factory = $this->buildFactory(); + $this->assertInstanceOf( ResponseFactory::class, $factory ); + } +} From 570cafdfee406ce7a659cf14912d3a786669b52f Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 14:27:12 -0600 Subject: [PATCH 3/9] Fix static analysis, code style, and unit test issues --- .github/workflows/ci.yml | 6 ++-- includes/CrawlerProtectionService.php | 10 +++---- includes/ResponseFactory.php | 5 ++-- .../unit/CrawlerProtectionServiceTest.php | 28 ++++++++++++++++--- tests/phpunit/unit/ResponseFactoryTest.php | 6 ++++ 5 files changed, 41 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d1505a5..b445e34 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -53,7 +53,7 @@ jobs: path: extensions/${{ env.EXTNAME }} - name: Setup Composer run: | - echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}}}' > composer.local.json + echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update composer update - name: Lint @@ -100,7 +100,7 @@ jobs: path: extensions/${{ env.EXTNAME }} - name: Setup Composer run: | - echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}}}' > composer.local.json + echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update composer update - name: Phan @@ -143,7 +143,7 @@ jobs: path: extensions/${{ env.EXTNAME }} - name: Setup Composer run: | - echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}}}' > composer.local.json + echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update composer update - name: Install MediaWiki diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php index ee77b8b..285bcc7 100644 --- a/includes/CrawlerProtectionService.php +++ b/includes/CrawlerProtectionService.php @@ -72,9 +72,9 @@ public function __construct( * @return bool */ public function checkPerformAction( - OutputPage $output, - User $user, - WebRequest $request + $output, + $user, + $request ): bool { if ( $user->isRegistered() ) { return true; @@ -111,8 +111,8 @@ public function checkPerformAction( */ public function checkSpecialPage( string $specialPageName, - OutputPage $output, - User $user + $output, + $user ): bool { if ( $user->isRegistered() ) { return true; diff --git a/includes/ResponseFactory.php b/includes/ResponseFactory.php index b611ad3..699ae0e 100644 --- a/includes/ResponseFactory.php +++ b/includes/ResponseFactory.php @@ -58,7 +58,7 @@ public function __construct( ServiceOptions $options ) { * @param OutputPage $output Used only for the "pretty" strategy * @return void */ - public function denyAccess( OutputPage $output ): void { + public function denyAccess( $output ): void { if ( $this->options->get( 'CrawlerProtectionUse418' ) ) { $this->denyAccessWith418(); } elseif ( $this->options->get( 'CrawlerProtectionRawDenial' ) ) { @@ -87,6 +87,7 @@ protected function denyAccessWith418(): void { * @param string $header * @param string $message * @return void + * @suppress PhanPluginNeverReturnMethod */ protected function denyAccessRaw( string $header, string $message ): void { header( $header ); @@ -99,7 +100,7 @@ protected function denyAccessRaw( string $header, string $message ): void { * @param OutputPage $output * @return void */ - protected function denyAccessPretty( OutputPage $output ): void { + protected function denyAccessPretty( $output ): void { $output->setStatusCode( 403 ); $output->addWikiTextAsInterface( wfMessage( 'crawlerprotection-accessdenied-text' )->plain() diff --git a/tests/phpunit/unit/CrawlerProtectionServiceTest.php b/tests/phpunit/unit/CrawlerProtectionServiceTest.php index d2d198f..907fff8 100644 --- a/tests/phpunit/unit/CrawlerProtectionServiceTest.php +++ b/tests/phpunit/unit/CrawlerProtectionServiceTest.php @@ -112,19 +112,39 @@ public function testCheckPerformActionBlocksAnonymous( array $getValMap, string public function provideBlockedRequestParams(): array { return [ 'type=revision' => [ - [ [ 'type', null, 'revision' ], [ 'action', null, null ], [ 'diff', null, null ], [ 'oldid', null, null ] ], + [ + [ 'type', null, 'revision' ], + [ 'action', null, null ], + [ 'diff', null, null ], + [ 'oldid', null, null ], + ], 'type=revision should be blocked', ], 'action=history' => [ - [ [ 'type', null, null ], [ 'action', null, 'history' ], [ 'diff', null, null ], [ 'oldid', null, null ] ], + [ + [ 'type', null, null ], + [ 'action', null, 'history' ], + [ 'diff', null, null ], + [ 'oldid', null, null ], + ], 'action=history should be blocked', ], 'diff=42' => [ - [ [ 'type', null, null ], [ 'action', null, null ], [ 'diff', null, '42' ], [ 'oldid', null, null ] ], + [ + [ 'type', null, null ], + [ 'action', null, null ], + [ 'diff', null, '42' ], + [ 'oldid', null, null ], + ], 'diff=42 should be blocked', ], 'oldid=99' => [ - [ [ 'type', null, null ], [ 'action', null, null ], [ 'diff', null, null ], [ 'oldid', null, '99' ] ], + [ + [ 'type', null, null ], + [ 'action', null, null ], + [ 'diff', null, null ], + [ 'oldid', null, '99' ], + ], 'oldid=99 should be blocked', ], ]; diff --git a/tests/phpunit/unit/ResponseFactoryTest.php b/tests/phpunit/unit/ResponseFactoryTest.php index e729c46..032e0d0 100644 --- a/tests/phpunit/unit/ResponseFactoryTest.php +++ b/tests/phpunit/unit/ResponseFactoryTest.php @@ -47,6 +47,12 @@ private function buildFactory( array $overrides = [] ): ResponseFactory { * @covers ::denyAccessPretty */ public function testDenyAccessPrettySetStatusCode() { + if ( defined( 'MEDIAWIKI' ) ) { + $this->markTestSkipped( + 'Skipped in MediaWiki integration environment: wfMessage() requires service container' + ); + } + $output = $this->createMock( self::$outputPageClassName ); $output->expects( $this->once() ) ->method( 'setStatusCode' ) From cdb47a004a5f620d3647e8662529f6902e473fce Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 14:42:26 -0600 Subject: [PATCH 4/9] Fix issues --- .github/workflows/ci.yml | 9 --------- extension.json | 2 +- includes/Hooks.php | 3 --- includes/ResponseFactory.php | 5 ++++- 4 files changed, 5 insertions(+), 14 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b445e34..2ae0846 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -25,9 +25,6 @@ jobs: php: [ '8.2', '8.3', '8.4' ] mediawiki: [ REL1_43 ] include: - - os: ubuntu-latest - php: '7.4' - mediawiki: REL1_35 - os: ubuntu-latest php: '7.4' mediawiki: REL1_39 @@ -72,9 +69,6 @@ jobs: php: [ '8.2', '8.3' ] mediawiki: [ REL1_43 ] include: - - os: ubuntu-latest - php: '7.4' - mediawiki: REL1_35 - os: ubuntu-latest php: '7.4' mediawiki: REL1_39 @@ -115,9 +109,6 @@ jobs: php: [ '8.2', '8.3', '8.4' ] mediawiki: [ REL1_43 ] include: - - os: ubuntu-latest - php: '7.4' - mediawiki: REL1_35 - os: ubuntu-latest php: '7.4' mediawiki: REL1_39 diff --git a/extension.json b/extension.json index c591431..239e921 100644 --- a/extension.json +++ b/extension.json @@ -5,7 +5,7 @@ "description": "Suite of protective measures to protect wikis from crawlers.", "type": "hook", "requires": { - "MediaWiki": ">= 1.35" + "MediaWiki": ">= 1.39.4" }, "AutoloadNamespaces": { "MediaWiki\\Extension\\CrawlerProtection\\": "includes/" diff --git a/includes/Hooks.php b/includes/Hooks.php index bf11661..518c046 100644 --- a/includes/Hooks.php +++ b/includes/Hooks.php @@ -23,9 +23,6 @@ // Class aliases for multi-version compatibility. // These need to be in global scope so phan can pick up on them, // and before any use statements that make use of the namespaced names. -if ( version_compare( MW_VERSION, '1.39.4', '<' ) ) { - class_alias( '\Title', '\MediaWiki\Title\Title' ); -} if ( version_compare( MW_VERSION, '1.41', '<' ) ) { class_alias( '\OutputPage', '\MediaWiki\Output\OutputPage' ); diff --git a/includes/ResponseFactory.php b/includes/ResponseFactory.php index 699ae0e..c4663b4 100644 --- a/includes/ResponseFactory.php +++ b/includes/ResponseFactory.php @@ -33,6 +33,9 @@ */ class ResponseFactory { + private const TEAPOT_HEADER = 'HTTP/1.0 418 I\'m a teapot'; + private const TEAPOT_BODY = 'I\'m a teapot'; + /** @var string[] List of constructor options this class accepts */ public const CONSTRUCTOR_OPTIONS = [ 'CrawlerProtectionUse418', @@ -78,7 +81,7 @@ public function denyAccess( $output ): void { * @suppress PhanPluginNeverReturnMethod */ protected function denyAccessWith418(): void { - $this->denyAccessRaw( 'HTTP/1.0 I\'m a teapot', 'I\'m a teapot' ); + $this->denyAccessRaw( self::TEAPOT_HEADER, self::TEAPOT_BODY ); } /** From 08025ab4e2bc4ac2416b207f1c6693d7d64a3762 Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 14:46:58 -0600 Subject: [PATCH 5/9] MIT License --- includes/CrawlerProtectionService.php | 31 ++++++++++++++++----------- includes/Hooks.php | 31 ++++++++++++++++----------- includes/ResponseFactory.php | 31 ++++++++++++++++----------- includes/ServiceWiring.php | 31 ++++++++++++++++----------- 4 files changed, 72 insertions(+), 52 deletions(-) diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php index 285bcc7..3fbee8e 100644 --- a/includes/CrawlerProtectionService.php +++ b/includes/CrawlerProtectionService.php @@ -1,21 +1,26 @@ Date: Fri, 27 Mar 2026 14:48:46 -0600 Subject: [PATCH 6/9] Add comments --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2ae0846..274526a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,6 +49,7 @@ jobs: with: path: extensions/${{ env.EXTNAME }} - name: Setup Composer + # Don't block insecure packages because MediaWiki might be using outdated dependencies, which shouldn't block the extension's pipelines from running run: | echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update @@ -93,6 +94,7 @@ jobs: with: path: extensions/${{ env.EXTNAME }} - name: Setup Composer + # Don't block insecure packages because MediaWiki might be using outdated dependencies, which shouldn't block the extension's pipelines from running run: | echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update @@ -133,6 +135,7 @@ jobs: with: path: extensions/${{ env.EXTNAME }} - name: Setup Composer + # Don't block insecure packages because MediaWiki might be using outdated dependencies, which shouldn't block the extension's pipelines from running run: | echo '{"extra":{"merge-plugin":{"include":["extensions/*/composer.json","skins/*/composer.json"]}},"config":{"audit":{"abandoned":"report","block-insecure":false}}}' > composer.local.json composer update From e0f3fac4e670c03f46ea5e6986240c1347511d9f Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 15:04:16 -0600 Subject: [PATCH 7/9] Clarify logic with 418 enabled and raw disabled --- includes/ResponseFactory.php | 24 +++++++++++----- tests/phpunit/unit/ResponseFactoryTest.php | 33 ++++++++++++++++++++-- 2 files changed, 48 insertions(+), 9 deletions(-) diff --git a/includes/ResponseFactory.php b/includes/ResponseFactory.php index a39a746..501c5d3 100644 --- a/includes/ResponseFactory.php +++ b/includes/ResponseFactory.php @@ -63,17 +63,27 @@ public function __construct( ServiceOptions $options ) { /** * Deny access using the configured strategy. * + * When CrawlerProtectionRawDenial is enabled, a raw HTTP response is + * sent. If CrawlerProtectionUse418 is *also* enabled the response + * uses the 418 "I'm a teapot" status; otherwise the configured + * header / body are used. + * + * When CrawlerProtectionRawDenial is disabled, a pretty 403 page is + * rendered through OutputPage regardless of the Use418 setting. + * * @param OutputPage $output Used only for the "pretty" strategy * @return void */ public function denyAccess( $output ): void { - if ( $this->options->get( 'CrawlerProtectionUse418' ) ) { - $this->denyAccessWith418(); - } elseif ( $this->options->get( 'CrawlerProtectionRawDenial' ) ) { - $this->denyAccessRaw( - $this->options->get( 'CrawlerProtectionRawDenialHeader' ), - $this->options->get( 'CrawlerProtectionRawDenialText' ) - ); + if ( $this->options->get( 'CrawlerProtectionRawDenial' ) ) { + if ( $this->options->get( 'CrawlerProtectionUse418' ) ) { + $this->denyAccessWith418(); + } else { + $this->denyAccessRaw( + $this->options->get( 'CrawlerProtectionRawDenialHeader' ), + $this->options->get( 'CrawlerProtectionRawDenialText' ) + ); + } } else { $this->denyAccessPretty( $output ); } diff --git a/tests/phpunit/unit/ResponseFactoryTest.php b/tests/phpunit/unit/ResponseFactoryTest.php index 032e0d0..1068fb0 100644 --- a/tests/phpunit/unit/ResponseFactoryTest.php +++ b/tests/phpunit/unit/ResponseFactoryTest.php @@ -67,12 +67,12 @@ public function testDenyAccessPrettySetStatusCode() { /** * @covers ::denyAccess */ - public function testDenyAccessChooses418WhenConfigured() { + public function testDenyAccessChooses418WhenBothRawAndUse418() { $factory = $this->getMockBuilder( ResponseFactory::class ) ->setConstructorArgs( [ new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, [ 'CrawlerProtectionUse418' => true, - 'CrawlerProtectionRawDenial' => false, + 'CrawlerProtectionRawDenial' => true, 'CrawlerProtectionRawDenialHeader' => '', 'CrawlerProtectionRawDenialText' => '', ] ) @@ -86,6 +86,35 @@ public function testDenyAccessChooses418WhenConfigured() { $factory->denyAccess( $output ); } + /** + * When RawDenial is false, Use418 should be a no-op and the factory + * must fall through to the pretty 403 page. + * + * @covers ::denyAccess + */ + public function testDenyAccessIgnoresUse418WhenRawDenialDisabled() { + $factory = $this->getMockBuilder( ResponseFactory::class ) + ->setConstructorArgs( [ + new ServiceOptions( ResponseFactory::CONSTRUCTOR_OPTIONS, [ + 'CrawlerProtectionUse418' => true, + 'CrawlerProtectionRawDenial' => false, + 'CrawlerProtectionRawDenialHeader' => '', + 'CrawlerProtectionRawDenialText' => '', + ] ) + ] ) + ->onlyMethods( [ 'denyAccessPretty', 'denyAccessWith418' ] ) + ->getMock(); + + $factory->expects( $this->never() )->method( 'denyAccessWith418' ); + + $output = $this->createMock( self::$outputPageClassName ); + $factory->expects( $this->once() ) + ->method( 'denyAccessPretty' ) + ->with( $output ); + + $factory->denyAccess( $output ); + } + /** * @covers ::denyAccess */ From d2d3c2c68489eb274cc2fb149a4003c162ec71cf Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 15:34:10 -0600 Subject: [PATCH 8/9] Broadening special page check to be language-agnostic --- includes/CrawlerProtectionService.php | 18 ++++++++++++------ .../unit/CrawlerProtectionServiceTest.php | 16 ++++++++++++++++ 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php index 3fbee8e..878c4b9 100644 --- a/includes/CrawlerProtectionService.php +++ b/includes/CrawlerProtectionService.php @@ -38,9 +38,6 @@ */ class CrawlerProtectionService { - /** @var string Prefix for special page names */ - private const SPECIAL_PAGE_PREFIX = 'Special:'; - /** @var string[] List of constructor options this class accepts */ public const CONSTRUCTOR_OPTIONS = [ 'CrawlerProtectedSpecialPages', @@ -135,18 +132,27 @@ public function checkSpecialPage( * Determine whether the given special page name is in the * configured list of protected special pages. * + * Because this method is only called from the SpecialPageBeforeExecute + * hook, any "Foo:" prefix on a configured value is necessarily the + * "Special" namespace in English or its localized equivalent (e.g. + * "Spezial:" in German). We therefore simply strip everything up to + * and including the first colon rather than checking for a specific + * namespace name, which keeps the logic language-agnostic. + * * @param string $specialPageName * @return bool */ public function isProtectedSpecialPage( string $specialPageName ): bool { $protectedSpecialPages = $this->options->get( 'CrawlerProtectedSpecialPages' ); - // Normalize protected special pages: lowercase and strip 'Special:' prefix + // Normalize protected special pages: lowercase and strip any + // namespace prefix (everything up to and including the first ':'). $normalizedProtectedPages = array_map( static function ( string $p ): string { $lower = strtolower( $p ); - if ( strpos( $lower, strtolower( self::SPECIAL_PAGE_PREFIX ) ) === 0 ) { - return substr( $lower, strlen( self::SPECIAL_PAGE_PREFIX ) ); + $colonPos = strpos( $lower, ':' ); + if ( $colonPos !== false ) { + return substr( $lower, $colonPos + 1 ); } return $lower; }, diff --git a/tests/phpunit/unit/CrawlerProtectionServiceTest.php b/tests/phpunit/unit/CrawlerProtectionServiceTest.php index 907fff8..1e92f5e 100644 --- a/tests/phpunit/unit/CrawlerProtectionServiceTest.php +++ b/tests/phpunit/unit/CrawlerProtectionServiceTest.php @@ -267,6 +267,22 @@ public function testIsProtectedSpecialPageReturnsFalseForUnprotected() { $this->assertFalse( $service->isProtectedSpecialPage( 'Search' ) ); } + /** + * @covers ::isProtectedSpecialPage + */ + public function testIsProtectedSpecialPageStripsLocalizedPrefix() { + $service = $this->buildService( [ 'Spezial:WhatLinksHere' ] ); + $this->assertTrue( $service->isProtectedSpecialPage( 'WhatLinksHere' ) ); + } + + /** + * @covers ::isProtectedSpecialPage + */ + public function testIsProtectedSpecialPageStripsAnyPrefix() { + $service = $this->buildService( [ 'Especial:WhatLinksHere' ] ); + $this->assertTrue( $service->isProtectedSpecialPage( 'WhatLinksHere' ) ); + } + /** * Data provider for blocked special pages. * From 38da555fcc0c019db72b6134cff166db3a280d5f Mon Sep 17 00:00:00 2001 From: Jeffrey Wang Date: Fri, 27 Mar 2026 15:57:55 -0600 Subject: [PATCH 9/9] Small changes suggested by Yaron --- .../mediawiki-extensions-clean-code.instructions.md | 6 +++++- extension.json | 2 +- includes/CrawlerProtectionService.php | 2 +- includes/Hooks.php | 2 +- includes/ResponseFactory.php | 2 +- includes/ServiceWiring.php | 2 +- 6 files changed, 10 insertions(+), 6 deletions(-) diff --git a/.github/instructions/mediawiki-extensions-clean-code.instructions.md b/.github/instructions/mediawiki-extensions-clean-code.instructions.md index e528c11..507c892 100644 --- a/.github/instructions/mediawiki-extensions-clean-code.instructions.md +++ b/.github/instructions/mediawiki-extensions-clean-code.instructions.md @@ -8,10 +8,14 @@ applyTo: "*" Use the service container pattern (based on MediaWikiServices) to define how key objects are constructed and wired together. +Absolutely no MediaWikiServices references should be in the code because everything should be injected. + ## Dependency injection Use this example from PluggableAuth to implement dependency injection. +Objects should only be instantiated in `ServiceWiring.php`, not in any other file. + Below is an `includes/ServiceWiring.php` file. ```php @@ -150,4 +154,4 @@ class PluggableAuthServiceTest extends MediaWikiIntegrationTestCase { } } -``` \ No newline at end of file +``` diff --git a/extension.json b/extension.json index 239e921..849dda5 100644 --- a/extension.json +++ b/extension.json @@ -40,7 +40,7 @@ "value": "HTTP/1.0 403 Forbidden" }, "CrawlerProtectionRawDenialText": { - "value": "403 Forbidden" + "value": "403 Forbidden. You must be logged in to view this page." } }, "ServiceWiringFiles": [ diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php index 878c4b9..43feda7 100644 --- a/includes/CrawlerProtectionService.php +++ b/includes/CrawlerProtectionService.php @@ -1,6 +1,6 @@