PhabExt(robots.txt): add more rules, ...

Summary:
setClientIDCookie none to return Response, more rules, todo:

Signed-off-by: Yongmin Hong <revi@omglol.email>

Test Plan: deploy?

Reviewers: O1 revi & automations, revi

Reviewed By: O1 revi & automations, revi

Differential Revision: https://issuetracker.revi.xyz/D338
This commit is contained in:
revi 2024-06-17 12:40:01 +09:00
parent 9afee876a5
commit 68f2133e9c
Signed by: revi
GPG key ID: 1EB4F6CEEA100E94

View file

@ -8,9 +8,7 @@ public function shouldRequireLogin() {
return false; return false;
} }
public function setClientIDCookie() { // TODO: Different content for cdn domains
return false;
}
public function processRequest() { public function processRequest() {
$out = array(); $out = array();
@ -19,7 +17,7 @@ public function processRequest() {
// Version timestamp is when I started editing them. // Version timestamp is when I started editing them.
// Edit setLastModified at the bottom as well. // Edit setLastModified at the bottom as well.
// Calculate EpochTime via go/epoch // Calculate EpochTime via go/epoch
$out[] = '# version: 20240523T040800+0900'; $out[] = '# version: 20240616T191900+0900';
$out[] = '# also at https://github.com/revi/sandbox.git'; $out[] = '# also at https://github.com/revi/sandbox.git';
$out[] = 'User-Agent: *'; $out[] = 'User-Agent: *';
$out[] = 'Disallow: /diffusion/'; $out[] = 'Disallow: /diffusion/';
@ -101,6 +99,27 @@ public function processRequest() {
$out[] = '# Block PetalBot, misbehaving'; $out[] = '# Block PetalBot, misbehaving';
$out[] = 'User-agent: PetalBot'; $out[] = 'User-agent: PetalBot';
$out[] = 'Disallow: /'; $out[] = 'Disallow: /';
$out[] = '# Block peer39';
$out[] = 'User-agent: peer39_crawler';
$out[] = 'User-agent: peer39_crawler/1.0';
$out[] = 'Disallow: /';
$out[] = '# Block SemRushBot';
$out[] = 'User-agent: SemrushBot';
$out[] = 'Disallow: /';
$out[] = '# Block AhrefsBot';
$out[] = 'User-agent: AhrefsBot';
$out[] = 'Disallow: /';
$out[] = '# See https://revi.xyz/robots.txt for rationales';
$out[] = 'User-agent: TurnitinBot';
$out[] = 'Disallow: /';
$out[] = 'User-agent: NPBot';
$out[] = 'Disallow: /';
$out[] = 'User-agent: SlySearch';
$out[] = 'Disallow: /';
$out[] = 'User-agent: BLEXBot';
$out[] = 'Disallow: /';
$out[] = 'User-agent: BrandVerity/1.0';
$out[] = 'Disallow: /';
// Crawl-delay entries at the bottom // Crawl-delay entries at the bottom
// Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129 // Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129
$out[] = 'User-agent: *'; $out[] = 'User-agent: *';
@ -111,7 +130,8 @@ public function processRequest() {
return id(new AphrontPlainTextResponse()) return id(new AphrontPlainTextResponse())
->setContent($content) ->setContent($content)
->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) ->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
->setClientIDCookie(false)
->setCanCDN(true) ->setCanCDN(true)
->setLastModified(1716404880); ->setLastModified(1718533140);
} }
} }