From 68f2133e9ce304f340fe4886b3660d0a961a03a8 Mon Sep 17 00:00:00 2001 From: Yongmin Hong Date: Mon, 17 Jun 2024 12:40:01 +0900 Subject: [PATCH] PhabExt(robots.txt): add more rules, ... Summary: setClientIDCookie none to return Response, more rules, todo: Signed-off-by: Yongmin Hong Test Plan: deploy? Reviewers: O1 revi & automations, revi Reviewed By: O1 revi & automations, revi Differential Revision: https://issuetracker.revi.xyz/D338 --- .../PhabricatorCustomRobotsTxtController.php | 30 +++++++++++++++---- 1 file changed, 25 insertions(+), 5 deletions(-) diff --git a/PhabExt/PhabricatorCustomRobotsTxtController.php b/PhabExt/PhabricatorCustomRobotsTxtController.php index 203290d..0d439e6 100644 --- a/PhabExt/PhabricatorCustomRobotsTxtController.php +++ b/PhabExt/PhabricatorCustomRobotsTxtController.php @@ -8,9 +8,7 @@ public function shouldRequireLogin() { return false; } - public function setClientIDCookie() { - return false; - } + // TODO: Different content for cdn domains public function processRequest() { $out = array(); @@ -19,7 +17,7 @@ public function processRequest() { // Version timestamp is when I started editing them. // Edit setLastModified at the bottom as well. // Calculate EpochTime via go/epoch - $out[] = '# version: 20240523T040800+0900'; + $out[] = '# version: 20240616T191900+0900'; $out[] = '# also at https://github.com/revi/sandbox.git'; $out[] = 'User-Agent: *'; $out[] = 'Disallow: /diffusion/'; @@ -101,6 +99,27 @@ public function processRequest() { $out[] = '# Block PetalBot, misbehaving'; $out[] = 'User-agent: PetalBot'; $out[] = 'Disallow: /'; + $out[] = '# Block peer39'; + $out[] = 'User-agent: peer39_crawler'; + $out[] = 'User-agent: peer39_crawler/1.0'; + $out[] = 'Disallow: /'; + $out[] = '# Block SemRushBot'; + $out[] = 'User-agent: SemrushBot'; + $out[] = 'Disallow: /'; + $out[] = '# Block AhrefsBot'; + $out[] = 'User-agent: AhrefsBot'; + $out[] = 'Disallow: /'; + $out[] = '# See https://revi.xyz/robots.txt for rationales'; + $out[] = 'User-agent: TurnitinBot'; + $out[] = 'Disallow: /'; + $out[] = 'User-agent: NPBot'; + $out[] = 'Disallow: /'; + $out[] = 'User-agent: SlySearch'; + $out[] = 'Disallow: /'; + $out[] = 'User-agent: BLEXBot'; + $out[] = 'Disallow: /'; + $out[] = 'User-agent: BrandVerity/1.0'; + $out[] = 'Disallow: /'; // Crawl-delay entries at the bottom // Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129 $out[] = 'User-agent: *'; @@ -111,7 +130,8 @@ public function processRequest() { return id(new AphrontPlainTextResponse()) ->setContent($content) ->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) + ->setClientIDCookie(false) ->setCanCDN(true) - ->setLastModified(1716404880); + ->setLastModified(1718533140); } }