From a257d3ccb4615030c33e5a80db18507c848ae037 Mon Sep 17 00:00:00 2001 From: Yongmin Hong Date: Fri, 10 May 2024 18:18:36 +0900 Subject: [PATCH] robots.txt: shorten some links Summary: Less size /shrug Signed-off-by: Yongmin Hong Test Plan: try visiting URLs to test redirection Reviewers: O1 revi & automations, revi Reviewed By: O1 revi & automations, revi Differential Revision: https://issuetracker.revi.xyz/D66 --- PhabExt/PhabricatorCustomRobotsTxtController.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/PhabExt/PhabricatorCustomRobotsTxtController.php b/PhabExt/PhabricatorCustomRobotsTxtController.php index c2fd533..8c1b6f7 100644 --- a/PhabExt/PhabricatorCustomRobotsTxtController.php +++ b/PhabExt/PhabricatorCustomRobotsTxtController.php @@ -1,4 +1,6 @@ +// SPDX-License-Identifier: Apache-2.0 abstract class PhabricatorRobotsController extends PhabricatorController { @@ -16,8 +18,8 @@ public function processRequest() { $out[] = '# Adapted from phabricator.wikimedia.org, we.phorge.it'; // Version timestamp is when I started editing them. // Edit setLastModified at the bottom as well. - // Calculate EpochTime via go/ZoneStamp - $out[] = '# version: 20240502T184200+0900'; + // Calculate EpochTime via go/epoch + $out[] = '# version: 20240509T235513+0900'; $out[] = '# also at https://github.com/revi/sandbox.git'; $out[] = 'User-Agent: *'; $out[] = 'Disallow: /diffusion/'; @@ -59,7 +61,7 @@ public function processRequest() { $out[] = 'Disallow: /p/'; // Phorge specific entries end here. $out[] = '# This is cloudflare endpoint'; - $out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/'; + $out[] = '# Ref: https://issuetracker.revi.xyz/u/cloudflarecdncgi'; $out[] = 'Disallow: /cdn-cgi/'; $out[] = '# Google Ads are not welcome'; $out[] = 'User-agent: Mediapartners-Google'; @@ -77,7 +79,7 @@ public function processRequest() { $out[] = 'User-agent: GPTBot'; $out[] = 'Disallow: /'; $out[] = '# Google Gemini AI Crawlers are also not welcome'; - $out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended'; + $out[] = '# Ref: https://issuetracker.revi.xyz/u/googleextended'; $out[] = 'User-agent: Google-Extended'; $out[] = 'Disallow: /'; $out[] = '# CCBot (ab)used to train LLMs'; @@ -89,7 +91,7 @@ public function processRequest() { $out[] = 'User-agent: FacebookBot'; $out[] = 'Disallow: /'; $out[] = '# DiffBot, though this one is known to have option to ignore robotstxt'; - $out[] = '# Ref https://docs.diffbot.com/docs/why-is-my-crawl-not-crawling-and-other-uncommon-crawl-problems'; + $out[] = '# Ref https://issuetracker.revi.xyz/u/robotstxtdiffbot'; $out[] = 'User-agent: Diffbot'; $out[] = 'Disallow: /'; $out[] = '# Bytespider'; @@ -107,6 +109,6 @@ public function processRequest() { ->setContent($content) ->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) ->setCanCDN(true) - ->setLastModified(1714642920); + ->setLastModified(1715266513); } }