meta: various changes

Summary:
Submitting three changes at once for lazyness:

meta: config cleanups

  Add editorconfig, gitignore, and conform to them.

RobotsTxt: add differential rule and conform to the editorconfig

time.php: add metadata, editorconfig conformance

Signed-off-by: Yongmin Hong <revi@omglol.email>

Test Plan: `code .` and edit files. for php files, deploy.

Reviewers: #acl_sudoers, #blessed_reviewers, revi

Reviewed By: #acl_sudoers, #blessed_reviewers, revi

Differential Revision: https://issuetracker.revi.xyz/D8
This commit is contained in:
revi 2024-05-02 19:25:47 +09:00
parent d2f7806542
commit 0e1656da0c
Signed by: revi
GPG key ID: 1EB4F6CEEA100E94
6 changed files with 223 additions and 101 deletions

32
.editorconfig Normal file
View file

@ -0,0 +1,32 @@
# EditorConfig is awesome: https://EditorConfig.org
root = true
[*]
charset = utf-8
end_of_line = lf
indent_size = tab
indent_style = tab
insert_final_newline = true
tab_width = 4
trim_trailing_whitespace = true
[*.md]
indent_style = space
indent_size = 2
# Tabs may not be valid YAML
# @see https://yaml.org/spec/1.2/spec.html#id2777534
[*.{yml,yaml}]
indent_style = space
indent_size = 2
# yaml-lint configuration
# YAML files should be indented with spaces, which .yamllint is
[.yamllint]
indent_style = space
indent_size = 2
[.git/**]
indent_style = space
indent_size = 2

78
.gitignore vendored Normal file
View file

@ -0,0 +1,78 @@
# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### macOS ###
# General
.DS_Store
.AppleDouble
.LSOverride
# Icon must end with two \r
Icon
# Thumbnails
._*
# Files that might appear in the root of a volume
.DocumentRevisions-V100
.fseventsd
.Spotlight-V100
.TemporaryItems
.Trashes
.VolumeIcon.icns
.com.apple.timemachine.donotpresent
# Directories potentially created on remote AFP share
.AppleDB
.AppleDesktop
Network Trash Folder
Temporary Items
.apdisk
### macOS Patch ###
# iCloud generated files
*.icloud
### Windows ###
# Windows thumbnail cache files
Thumbs.db
Thumbs.db:encryptable
ehthumbs.db
ehthumbs_vista.db
# Dump file
*.stackdump
# Folder config file
[Dd]esktop.ini
# Recycle Bin used on file shares
$RECYCLE.BIN/
# Windows Installer files
*.cab
*.msi
*.msix
*.msm
*.msp
# Windows shortcuts
*.lnk
# End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows

View file

@ -2,104 +2,111 @@
abstract class PhabricatorRobotsController extends PhabricatorController {
public function shouldRequireLogin() {
return false;
public function shouldRequireLogin() {
return false;
}
public function setClientIDCookie() {
return false;
}
public function processRequest() {
$out = array();
$out = array();
$out[] = '# Forked from phabricator.wikimedia.org, we.phorge.it';
$out[] = '# Adapted from phabricator.wikimedia.org, we.phorge.it';
// Version timestamp is when I started editing them.
// Edit setLastModified at the bottom as well.
// Calculate EpochTime via go/ZoneStamp
$out[] = '# version: 20240430T032700+0900';
$out[] = '# also at https://github.com/revi/sandbox.git';
$out[] = 'User-Agent: *';
$out[] = 'Disallow: /diffusion/';
$out[] = 'Disallow: /source/';
$out[] = 'Disallow: /multimeter/';
$out[] = 'Disallow: /policy/explain';
$out[] = 'Disallow: /auth';
$out[] = 'Disallow: /login';
$out[] = 'Disallow: /maniphest/transaction';
$out[] = 'Disallow: /tag';
$out[] = 'Disallow: /search/query/all';
$out[] = 'Disallow: /conduit';
$out[] = 'Disallow: /api';
$out[] = 'Disallow: /project';
$out[] = 'Disallow: /applications';
$out[] = 'Disallow: /token';
$out[] = 'Disallow: /pholio';
$out[] = 'Disallow: /dashboard';
$out[] = 'Disallow: /calendar';
$out[] = 'Disallow: /herald';
// This is commits.
$out[] = 'Disallow: /r*';
// This is Files. (F$)
$out[] = 'Disallow: /file';
$out[] = 'Disallow: /F*%24*';
// This is pastes (P$)
$out[] = 'Disallow: /paste';
$out[] = 'Disallow: /P*%24*';
// This is blog entries (J$)
$out[] = 'Disallow: /phame';
$out[] = 'Disallow: /J*%24*';
// This is user list.
// As of 2024-04-17 user list is behind auth but who knows it might change?
$out[] = 'Disallow: /people';
// This is user profile link.
$out[] = 'Disallow: /p/';
// Phorge specific entries end here.
$out[] = '# This is cloudflare endpoint';
$out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/';
$out[] = 'Disallow: /cdn-cgi/';
$out[] = '# Google Ads are not welcome';
$out[] = 'User-agent: Mediapartners-Google';
$out[] = 'Disallow: /';
$out[] = 'User-agent: AdsBot-Google';
$out[] = 'Disallow: /';
$out[] = 'User-agent: AdsBot-Google-Mobile';
$out[] = 'Disallow: /';
// While I sometimes (borderline 'rare') use LLMs (GPT, Gemini, …), I'd rather prefer LLMs not use my stuff to profit
// Well I think my stuff is mostly out of interest for them, tho…
$out[] = '# ChatGPT Crawlers are not welcome';
$out[] = '# Ref: https://platform.openai.com/docs/plugins/bot';
$out[] = 'User-agent: ChatGPT-User';
$out[] = 'Disallow: /';
$out[] = 'User-agent: GPTBot';
$out[] = 'Disallow: /';
$out[] = '# Google Gemini AI Crawlers are also not welcome';
$out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended';
$out[] = 'User-agent: Google-Extended';
$out[] = 'Disallow: /';
$out[] = '# CCBot (ab)used to train LLMs';
$out[] = '# Ref: https://darkvisitors.com/agents/ccbot';
$out[] = 'User-agent: CCBot';
$out[] = 'Disallow: /';
$out[] = '# Facebook LLM Bot';
$out[] = '# Ref: https://developers.facebook.com/docs/sharing/bot/';
$out[] = 'User-agent: FacebookBot';
$out[] = 'Disallow: /';
$out[] = '# DiffBot, though this one is known to have option to ignore robotstxt';
$out[] = '# Ref https://docs.diffbot.com/docs/why-is-my-crawl-not-crawling-and-other-uncommon-crawl-problems';
$out[] = 'User-agent: Diffbot';
$out[] = 'Disallow: /';
$out[] = '# Bytespider';
$out[] = '# Ref: https://darkvisitors.com/agents/bytespider';
$out[] = 'User-agent: Bytespider';
$out[] = 'Disallow: /';
// Crawl-delay entries at the bottom
// Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129
$out[] = 'User-agent: *';
$out[] = 'Crawl-delay: 1';
$out[] = '# version: 20240502T184200+0900';
$out[] = '# also at https://github.com/revi/sandbox.git';
$out[] = 'User-Agent: *';
$out[] = 'Disallow: /diffusion/';
$out[] = 'Disallow: /source/';
$out[] = 'Disallow: /multimeter/';
$out[] = 'Disallow: /policy/explain';
$out[] = 'Disallow: /auth';
$out[] = 'Disallow: /login';
$out[] = 'Disallow: /maniphest/transaction';
$out[] = 'Disallow: /tag';
$out[] = 'Disallow: /search/query/all';
$out[] = 'Disallow: /conduit';
$out[] = 'Disallow: /api';
$out[] = 'Disallow: /project';
$out[] = 'Disallow: /applications';
$out[] = 'Disallow: /token';
$out[] = 'Disallow: /pholio';
$out[] = 'Disallow: /dashboard';
$out[] = 'Disallow: /calendar';
$out[] = 'Disallow: /herald';
// This is commits.
$out[] = 'Disallow: /r*';
// This is differential reviews
$out[] = 'Disallow: /differential';
$out[] = 'Disallow: /D*%24*';
// This is Files. (F$)
$out[] = 'Disallow: /file';
$out[] = 'Disallow: /F*%24*';
// This is pastes (P$)
$out[] = 'Disallow: /paste';
$out[] = 'Disallow: /P*%24*';
// This is blog entries (J$)
$out[] = 'Disallow: /phame';
$out[] = 'Disallow: /J*%24*';
// This is user list.
// As of 2024-04-17 user list is behind auth but who knows it might change?
$out[] = 'Disallow: /people';
// This is user profile link.
$out[] = 'Disallow: /p/';
// Phorge specific entries end here.
$out[] = '# This is cloudflare endpoint';
$out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/';
$out[] = 'Disallow: /cdn-cgi/';
$out[] = '# Google Ads are not welcome';
$out[] = 'User-agent: Mediapartners-Google';
$out[] = 'Disallow: /';
$out[] = 'User-agent: AdsBot-Google';
$out[] = 'Disallow: /';
$out[] = 'User-agent: AdsBot-Google-Mobile';
$out[] = 'Disallow: /';
// While I sometimes (borderline 'rare') use LLMs (GPT, Gemini, …), I'd rather prefer LLMs not use my stuff to profit
// Well I think my stuff is mostly out of interest for them, tho…
$out[] = '# ChatGPT Crawlers are not welcome';
$out[] = '# Ref: https://platform.openai.com/docs/plugins/bot';
$out[] = 'User-agent: ChatGPT-User';
$out[] = 'Disallow: /';
$out[] = 'User-agent: GPTBot';
$out[] = 'Disallow: /';
$out[] = '# Google Gemini AI Crawlers are also not welcome';
$out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended';
$out[] = 'User-agent: Google-Extended';
$out[] = 'Disallow: /';
$out[] = '# CCBot (ab)used to train LLMs';
$out[] = '# Ref: https://darkvisitors.com/agents/ccbot';
$out[] = 'User-agent: CCBot';
$out[] = 'Disallow: /';
$out[] = '# Facebook LLM Bot';
$out[] = '# Ref: https://developers.facebook.com/docs/sharing/bot/';
$out[] = 'User-agent: FacebookBot';
$out[] = 'Disallow: /';
$out[] = '# DiffBot, though this one is known to have option to ignore robotstxt';
$out[] = '# Ref https://docs.diffbot.com/docs/why-is-my-crawl-not-crawling-and-other-uncommon-crawl-problems';
$out[] = 'User-agent: Diffbot';
$out[] = 'Disallow: /';
$out[] = '# Bytespider';
$out[] = '# Ref: https://darkvisitors.com/agents/bytespider';
$out[] = 'User-agent: Bytespider';
$out[] = 'Disallow: /';
// Crawl-delay entries at the bottom
// Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129
$out[] = 'User-agent: *';
$out[] = 'Crawl-delay: 1';
$content = implode("\n", $out)."\n";
$content = implode("\n", $out)."\n";
return id(new AphrontPlainTextResponse())
->setContent($content)
->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
->setCanCDN(true)
->setLastModified(1714415220);
}
return id(new AphrontPlainTextResponse())
->setContent($content)
->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
->setCanCDN(true)
->setLastModified(1714642920);
}
}

View file

@ -1,9 +1,13 @@
<html>
<html lang="en" prefix="og: https://ogp.me/ns#">
<head>
<title>
Hey revi, what time is it for you?
</title>
<link rel="icon" href="https://r2.revicdn.net/pfp2.png">
<meta property="og:title" content="Hey revi, what time is it for you?" />
<meta property="og:description" content="A simple page that shows current date and time for revi." />
<meta property="og:image" content="https://r2.revicdn.net/pfp2.png" />
<meta property="og:url" content="https://k.revi.xyz/time.php" />
<style>
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono&family=IBM+Plex+Sans+KR:wght@300;400&family=Noto+Sans+KR&display=swap');
@ -13,6 +17,8 @@
line-height: 1.6;
font-size: 16px;
padding: 0 10px;
overflow-wrap: break-all;
word-break: keep-all;
font-family:
'IBM Plex Sans KR',
'Noto Sans KR',
@ -26,10 +32,10 @@
'Segoe UI Emoji';
}
.code {
font-family:
'IBM Plex Mono',
'Courier New',
monospace;
font-family:
'IBM Plex Mono',
'Courier New',
monospace;
}
</style>
</head>
@ -56,7 +62,6 @@
echo "<p>It is {$date} {$time} in <code>Asia/Seoul</code>, where <a rel='me' href='https://revi.xyz'>revi</a> lives.<br />Check in your timezone: <a href='{$zonestampLink}'>ZoneStamp</a>.</p>";
?>
<br />
<hr>
<p><a href='https://github.com/revi/sandbox/blob/master/time.php'>Source code @ GitHub</a>. (Warning: it is far from 'clean'.) Also, please note that there is <a href='https://xkcd.com/1179/'>only one correct way to write dates</a>.</p>
</body>