Phorge(robots.txt): add applebot-extended to disallow

Summary:
Ref: https://support.apple.com/en-us/119829#datausage

Signed-off-by: Yongmin Hong <revi@omglol.email>

Test Plan: Verify the user-agent matches.

Reviewers: O1 revi & automations, revi

Reviewed By: O1 revi & automations, revi

Differential Revision: https://issuetracker.revi.xyz/D508
This commit is contained in:
revi 2024-07-03 23:23:56 +09:00
parent 215a7a5beb
commit 8084acebfa
Signed by: revi
GPG key ID: 1EB4F6CEEA100E94

View file

@ -17,7 +17,7 @@ public function processRequest() {
// Version timestamp is when I started editing them. // Version timestamp is when I started editing them.
// Edit setLastModified at the bottom as well. // Edit setLastModified at the bottom as well.
// Calculate EpochTime via go/epoch // Calculate EpochTime via go/epoch
$out[] = '# version: 20240621T160501+0900'; $out[] = '# version: 20240703T230700+0900';
$out[] = '# also at https://github.com/revi/sandbox.git'; $out[] = '# also at https://github.com/revi/sandbox.git';
$out[] = 'User-Agent: *'; $out[] = 'User-Agent: *';
$out[] = 'Disallow: /diffusion/'; $out[] = 'Disallow: /diffusion/';
@ -101,6 +101,10 @@ public function processRequest() {
$out[] = '# Ref: https://issuetracker.revi.xyz/u/googleextended'; $out[] = '# Ref: https://issuetracker.revi.xyz/u/googleextended';
$out[] = 'User-agent: Google-Extended'; $out[] = 'User-agent: Google-Extended';
$out[] = 'Disallow: /'; $out[] = 'Disallow: /';
$out[] = '# Apple AI stuff';
$out[] = '# Ref: https://support.apple.com/en-us/119829#datausage';
$out[] = 'User-agent: Applebot-Extended';
$out[] = 'Disallow: /';
$out[] = '# CCBot (ab)used to train LLMs'; $out[] = '# CCBot (ab)used to train LLMs';
$out[] = '# Ref: https://darkvisitors.com/agents/ccbot'; $out[] = '# Ref: https://darkvisitors.com/agents/ccbot';
$out[] = 'User-agent: CCBot'; $out[] = 'User-agent: CCBot';
@ -180,6 +184,6 @@ public function processRequest() {
->setCacheDurationInSeconds(phutil_units('2 hours in seconds')) ->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
// ->setClientIDCookie(false) (Doesn't work /shrug) // ->setClientIDCookie(false) (Doesn't work /shrug)
->setCanCDN(true) ->setCanCDN(true)
->setLastModified(1718953501); ->setLastModified(1720015620);
} }
} }