From cb20e4d79509287f8e91af3804f8a304fa305196 Mon Sep 17 00:00:00 2001 From: Yongmin Hong Date: Wed, 17 Apr 2024 01:35:10 +0900 Subject: [PATCH] RobotsTxt: add more rules Interest of nobody. Bug: N/A Signed-off-by: Yongmin Hong --- PhabExt/PhabricatorCustomRobotsTxtController.php | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/PhabExt/PhabricatorCustomRobotsTxtController.php b/PhabExt/PhabricatorCustomRobotsTxtController.php index 7c54273..eab79ee 100644 --- a/PhabExt/PhabricatorCustomRobotsTxtController.php +++ b/PhabExt/PhabricatorCustomRobotsTxtController.php @@ -10,7 +10,8 @@ public function processRequest() { $out = array(); $out[] = '# Forked from phabricator.wikimedia.org, we.phorge.it'; - $out[] = '# version: 20240416T211100+0900'; + // Version timestamp is when I started editing them. + $out[] = '# version: 20240417T011800+0900'; $out[] = '# also at https://github.com/revi/sandbox.git'; $out[] = 'User-Agent: *'; $out[] = 'Disallow: /diffusion/'; @@ -33,8 +34,19 @@ public function processRequest() { $out[] = 'Disallow: /dashboard'; $out[] = 'Disallow: /calendar'; $out[] = 'Disallow: /herald'; + // This is commits. $out[] = 'Disallow: /r*'; + // This is pastes (P$) $out[] = 'Disallow: /P*%24*'; + $out[] = 'Disallow: /phame'; + // This is blog entries (J$) + $out[] = 'Disallow: J*%24*'; + // This is user list. + // As of 2024-04-17 user list is behind auth but who knows it might change? + $out[] = '/people'; + // This is user profile link. + $out[] = 'Disallow: /p/'; + // Phorge specific entries end here. $out[] = '# This is cloudflare endpoint'; $out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/'; $out[] = 'Disallow: /cdn-cgi/'; @@ -55,6 +67,8 @@ public function processRequest() { $out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended'; $out[] = 'User-agent: Google-Extended'; $out[] = 'Disallow: /'; + // Crawl-delay entries at the bottom + // Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129 $out[] = 'User-agent: *'; $out[] = 'Crawl-delay: 1';