diff --git a/README.md b/README.md index 27dbe91..703544e 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,17 @@ intensive. `curl -s "[YOURWIKI]api.php?action=query&meta=siteinfo&siprop=specialpagealiases&format=json" | jq -r '.query.specialpagealiases[].aliases[]' | sort` Of course certain Specials MUST be allowed like Special:Login so do not block everything. +* `$wgCrawlerProtectedActions` - array of MediaWiki action names to block for + anonymous users (default: `[ 'history' ]`). Removing `'history'` from this + list disables protection of the history-listing page but does not affect + whether individual revisions and diffs are protected (see + `$wgCrawlerProtectionProtectRevisions` below). +* `$wgCrawlerProtectionProtectRevisions` - when `true` (default), anonymous + access to individual revisions and diffs (requests using `type=revision`, + `diff=`, or `oldid=` query parameters) is denied. Set to `false` to allow + anonymous access to those URLs. This setting is independent of + `$wgCrawlerProtectedActions`, so you can protect revisions/diffs without + protecting the history listing page, or vice versa. * `$wgCrawlerProtectionUse418` - drop denied requests in a quick way via `die();` with [418 I'm a teapot](https://developer.mozilla.org/en-US/docs/Web/HTTP/Reference/Status/418) diff --git a/extension.json b/extension.json index 4ce8c2c..4b41a6a 100644 --- a/extension.json +++ b/extension.json @@ -53,6 +53,9 @@ "CrawlerProtectionAllowedIPs": { "value": [], "merge_strategy": "provide_default" + }, + "CrawlerProtectionProtectRevisions": { + "value": true } }, "ServiceWiringFiles": [ diff --git a/includes/CrawlerProtectionService.php b/includes/CrawlerProtectionService.php index 0dd0360..fb00df6 100644 --- a/includes/CrawlerProtectionService.php +++ b/includes/CrawlerProtectionService.php @@ -44,6 +44,7 @@ class CrawlerProtectionService { 'CrawlerProtectedActions', 'CrawlerProtectedSpecialPages', 'CrawlerProtectionAllowedIPs', + 'CrawlerProtectionProtectRevisions', ]; /** @var ServiceOptions */ @@ -90,16 +91,16 @@ public function checkPerformAction( $diffId = (int)$request->getVal( 'diff' ); $oldId = (int)$request->getVal( 'oldid' ); - // The type=revision, diff and oldid parameters are all ways of - // viewing page history. They are only blocked when 'history' is - // in the configured list of protected actions, so that operators - // can fully disable history blocking by removing 'history' from - // $wgCrawlerProtectedActions. - $historyProtected = $this->isProtectedAction( 'history' ); + // $wgCrawlerProtectionProtectRevisions independently controls whether + // type=revision, diff and oldid requests are blocked. This allows + // operators to disable history-listing protection (by removing 'history' + // from $wgCrawlerProtectedActions) while still blocking direct access + // to individual revisions and diffs, or vice versa. + $revisionsProtected = $this->options->get( 'CrawlerProtectionProtectRevisions' ); if ( $this->isProtectedAction( $action ) - || ( $historyProtected && ( + || ( $revisionsProtected && ( $type === 'revision' || $diffId > 0 || $oldId > 0 diff --git a/tests/phpunit/unit/CrawlerProtectionServiceTest.php b/tests/phpunit/unit/CrawlerProtectionServiceTest.php index 8d99287..172470d 100644 --- a/tests/phpunit/unit/CrawlerProtectionServiceTest.php +++ b/tests/phpunit/unit/CrawlerProtectionServiceTest.php @@ -44,20 +44,23 @@ public static function setUpBeforeClass(): void { * @param array $protectedActions * @param string|array $allowedIPs * @param ResponseFactory|\PHPUnit\Framework\MockObject\MockObject|null $responseFactory + * @param bool $protectRevisions * @return CrawlerProtectionService */ private function buildService( array $protectedPages = [ 'recentchangeslinked', 'whatlinkshere', 'mobilediff' ], array $protectedActions = [ 'history' ], $allowedIPs = [], - $responseFactory = null + $responseFactory = null, + bool $protectRevisions = true ): CrawlerProtectionService { $options = new ServiceOptions( CrawlerProtectionService::CONSTRUCTOR_OPTIONS, [ 'CrawlerProtectedActions' => $protectedActions, 'CrawlerProtectedSpecialPages' => $protectedPages, - 'CrawlerProtectionAllowedIPs' => $allowedIPs + 'CrawlerProtectionAllowedIPs' => $allowedIPs, + 'CrawlerProtectionProtectRevisions' => $protectRevisions, ] ); @@ -205,10 +208,11 @@ public function testCheckPerformActionBlocksConfiguredAction() { } /** - * When 'history' is removed from CrawlerProtectedActions, the - * history-related parameters (type=revision, diff, oldid) should - * also be allowed, so that operators can fully disable history - * blocking via configuration (see issue: history can't be unblocked). + * When 'history' is removed from CrawlerProtectedActions, action=history + * should be allowed. When CrawlerProtectionProtectRevisions is false, + * the history-related parameters (type=revision, diff, oldid) should also + * be allowed, so that operators can independently control revision and + * history-listing protection. * * @covers ::checkPerformAction * @dataProvider provideHistoryRelatedRequestParams @@ -230,7 +234,7 @@ public function testCheckPerformActionAllowsHistoryRelatedWhenNotConfigured( $responseFactory = $this->createMock( ResponseFactory::class ); $responseFactory->expects( $this->never() )->method( 'denyAccess' ); - $service = $this->buildService( [], [], [], $responseFactory ); + $service = $this->buildService( [], [], [], $responseFactory, false ); $this->assertTrue( $service->checkPerformAction( $output, $user, $request ), $msg ); } @@ -304,6 +308,135 @@ public function testCheckPerformActionAllowsActionNotInConfig() { $this->assertTrue( $service->checkPerformAction( $output, $user, $request ) ); } + /** + * When CrawlerProtectionProtectRevisions is true and 'history' is NOT in + * CrawlerProtectedActions, revision/diff requests should still be blocked. + * This allows operators to protect individual revisions without protecting + * the history listing page. + * + * @covers ::checkPerformAction + * @dataProvider provideRevisionOnlyRequestParams + * + * @param array $getValMap + * @param string $msg + */ + public function testCheckPerformActionBlocksRevisionsWhenProtectRevisionsTrueHistoryUnconfigured( + array $getValMap, string $msg + ) { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + $user->method( 'getName' )->willReturn( '127.0.0.1' ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( $getValMap ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->once() )->method( 'denyAccess' )->with( $output ); + + // history not in protected actions, but protectRevisions = true + $service = $this->buildService( [], [], [], $responseFactory, true ); + $this->assertFalse( $service->checkPerformAction( $output, $user, $request ), $msg ); + } + + /** + * Data provider for revision/diff request parameters (excludes action=history + * since that is controlled independently by CrawlerProtectedActions). + * + * @return array + */ + public function provideRevisionOnlyRequestParams(): array { + return [ + 'type=revision' => [ + [ + [ 'type', null, 'revision' ], + [ 'action', null, null ], + [ 'diff', null, null ], + [ 'oldid', null, null ], + ], + 'type=revision should be blocked when CrawlerProtectionProtectRevisions is true', + ], + 'diff=42' => [ + [ + [ 'type', null, null ], + [ 'action', null, null ], + [ 'diff', null, '42' ], + [ 'oldid', null, null ], + ], + 'diff=42 should be blocked when CrawlerProtectionProtectRevisions is true', + ], + 'oldid=99' => [ + [ + [ 'type', null, null ], + [ 'action', null, null ], + [ 'diff', null, null ], + [ 'oldid', null, '99' ], + ], + 'oldid=99 should be blocked when CrawlerProtectionProtectRevisions is true', + ], + ]; + } + + /** + * When CrawlerProtectionProtectRevisions is false, revision/diff requests + * should be allowed even when 'history' is in CrawlerProtectedActions. + * + * @covers ::checkPerformAction + * @dataProvider provideRevisionOnlyRequestParams + * + * @param array $getValMap + * @param string $msg + */ + public function testCheckPerformActionAllowsRevisionsWhenProtectRevisionsFalse( + array $getValMap, string $msg + ) { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + $user->method( 'getName' )->willReturn( '127.0.0.1' ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( $getValMap ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->never() )->method( 'denyAccess' ); + + // history is protected, but protectRevisions = false + $service = $this->buildService( [], [ 'history' ], [], $responseFactory, false ); + $this->assertTrue( + $service->checkPerformAction( $output, $user, $request ), + $msg + ); + } + + /** + * action=history is controlled solely by CrawlerProtectedActions. It must + * remain blocked even when CrawlerProtectionProtectRevisions is false. + * + * @covers ::checkPerformAction + */ + public function testCheckPerformActionBlocksHistoryListingEvenWhenProtectRevisionsFalse() { + $output = $this->createMock( self::$outputPageClassName ); + $user = $this->createMock( self::$userClassName ); + $user->method( 'isRegistered' )->willReturn( false ); + $user->method( 'getName' )->willReturn( '127.0.0.1' ); + + $request = $this->createMock( self::$webRequestClassName ); + $request->method( 'getVal' )->willReturnMap( [ + [ 'type', null, null ], + [ 'action', null, 'history' ], + [ 'diff', null, null ], + [ 'oldid', null, null ], + ] ); + + $responseFactory = $this->createMock( ResponseFactory::class ); + $responseFactory->expects( $this->once() )->method( 'denyAccess' )->with( $output ); + + // history is protected, but protectRevisions = false + $service = $this->buildService( [], [ 'history' ], [], $responseFactory, false ); + $this->assertFalse( $service->checkPerformAction( $output, $user, $request ) ); + } + // --------------------------------------------------------------- // isProtectedAction tests // ---------------------------------------------------------------