meta: various changes
Summary: Submitting three changes at once for lazyness: meta: config cleanups Add editorconfig, gitignore, and conform to them. RobotsTxt: add differential rule and conform to the editorconfig time.php: add metadata, editorconfig conformance Signed-off-by: Yongmin Hong <revi@omglol.email> Test Plan: `code .` and edit files. for php files, deploy. Reviewers: #acl_sudoers, #blessed_reviewers, revi Reviewed By: #acl_sudoers, #blessed_reviewers, revi Differential Revision: https://issuetracker.revi.xyz/D8
This commit is contained in:
parent
d2f7806542
commit
0e1656da0c
6 changed files with 223 additions and 101 deletions
32
.editorconfig
Normal file
32
.editorconfig
Normal file
|
@ -0,0 +1,32 @@
|
|||
# EditorConfig is awesome: https://EditorConfig.org
|
||||
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
end_of_line = lf
|
||||
indent_size = tab
|
||||
indent_style = tab
|
||||
insert_final_newline = true
|
||||
tab_width = 4
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
[*.md]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# Tabs may not be valid YAML
|
||||
# @see https://yaml.org/spec/1.2/spec.html#id2777534
|
||||
[*.{yml,yaml}]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
# yaml-lint configuration
|
||||
# YAML files should be indented with spaces, which .yamllint is
|
||||
[.yamllint]
|
||||
indent_style = space
|
||||
indent_size = 2
|
||||
|
||||
[.git/**]
|
||||
indent_style = space
|
||||
indent_size = 2
|
78
.gitignore
vendored
Normal file
78
.gitignore
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
# Created by https://www.toptal.com/developers/gitignore/api/linux,macos,windows
|
||||
# Edit at https://www.toptal.com/developers/gitignore?templates=linux,macos,windows
|
||||
|
||||
### Linux ###
|
||||
*~
|
||||
|
||||
# temporary files which can be created if a process still has a handle open of a deleted file
|
||||
.fuse_hidden*
|
||||
|
||||
# KDE directory preferences
|
||||
.directory
|
||||
|
||||
# Linux trash folder which might appear on any partition or disk
|
||||
.Trash-*
|
||||
|
||||
# .nfs files are created when an open file is removed but is still being accessed
|
||||
.nfs*
|
||||
|
||||
### macOS ###
|
||||
# General
|
||||
.DS_Store
|
||||
.AppleDouble
|
||||
.LSOverride
|
||||
|
||||
# Icon must end with two \r
|
||||
Icon
|
||||
|
||||
|
||||
# Thumbnails
|
||||
._*
|
||||
|
||||
# Files that might appear in the root of a volume
|
||||
.DocumentRevisions-V100
|
||||
.fseventsd
|
||||
.Spotlight-V100
|
||||
.TemporaryItems
|
||||
.Trashes
|
||||
.VolumeIcon.icns
|
||||
.com.apple.timemachine.donotpresent
|
||||
|
||||
# Directories potentially created on remote AFP share
|
||||
.AppleDB
|
||||
.AppleDesktop
|
||||
Network Trash Folder
|
||||
Temporary Items
|
||||
.apdisk
|
||||
|
||||
### macOS Patch ###
|
||||
# iCloud generated files
|
||||
*.icloud
|
||||
|
||||
### Windows ###
|
||||
# Windows thumbnail cache files
|
||||
Thumbs.db
|
||||
Thumbs.db:encryptable
|
||||
ehthumbs.db
|
||||
ehthumbs_vista.db
|
||||
|
||||
# Dump file
|
||||
*.stackdump
|
||||
|
||||
# Folder config file
|
||||
[Dd]esktop.ini
|
||||
|
||||
# Recycle Bin used on file shares
|
||||
$RECYCLE.BIN/
|
||||
|
||||
# Windows Installer files
|
||||
*.cab
|
||||
*.msi
|
||||
*.msix
|
||||
*.msm
|
||||
*.msp
|
||||
|
||||
# Windows shortcuts
|
||||
*.lnk
|
||||
|
||||
# End of https://www.toptal.com/developers/gitignore/api/linux,macos,windows
|
|
@ -2,104 +2,111 @@
|
|||
|
||||
abstract class PhabricatorRobotsController extends PhabricatorController {
|
||||
|
||||
public function shouldRequireLogin() {
|
||||
return false;
|
||||
public function shouldRequireLogin() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public function setClientIDCookie() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public function processRequest() {
|
||||
$out = array();
|
||||
$out = array();
|
||||
|
||||
$out[] = '# Forked from phabricator.wikimedia.org, we.phorge.it';
|
||||
$out[] = '# Adapted from phabricator.wikimedia.org, we.phorge.it';
|
||||
// Version timestamp is when I started editing them.
|
||||
// Edit setLastModified at the bottom as well.
|
||||
// Calculate EpochTime via go/ZoneStamp
|
||||
$out[] = '# version: 20240430T032700+0900';
|
||||
$out[] = '# also at https://github.com/revi/sandbox.git';
|
||||
$out[] = 'User-Agent: *';
|
||||
$out[] = 'Disallow: /diffusion/';
|
||||
$out[] = 'Disallow: /source/';
|
||||
$out[] = 'Disallow: /multimeter/';
|
||||
$out[] = 'Disallow: /policy/explain';
|
||||
$out[] = 'Disallow: /auth';
|
||||
$out[] = 'Disallow: /login';
|
||||
$out[] = 'Disallow: /maniphest/transaction';
|
||||
$out[] = 'Disallow: /tag';
|
||||
$out[] = 'Disallow: /search/query/all';
|
||||
$out[] = 'Disallow: /conduit';
|
||||
$out[] = 'Disallow: /api';
|
||||
$out[] = 'Disallow: /project';
|
||||
$out[] = 'Disallow: /applications';
|
||||
$out[] = 'Disallow: /token';
|
||||
$out[] = 'Disallow: /pholio';
|
||||
$out[] = 'Disallow: /dashboard';
|
||||
$out[] = 'Disallow: /calendar';
|
||||
$out[] = 'Disallow: /herald';
|
||||
// This is commits.
|
||||
$out[] = 'Disallow: /r*';
|
||||
// This is Files. (F$)
|
||||
$out[] = 'Disallow: /file';
|
||||
$out[] = 'Disallow: /F*%24*';
|
||||
// This is pastes (P$)
|
||||
$out[] = 'Disallow: /paste';
|
||||
$out[] = 'Disallow: /P*%24*';
|
||||
// This is blog entries (J$)
|
||||
$out[] = 'Disallow: /phame';
|
||||
$out[] = 'Disallow: /J*%24*';
|
||||
// This is user list.
|
||||
// As of 2024-04-17 user list is behind auth but who knows it might change?
|
||||
$out[] = 'Disallow: /people';
|
||||
// This is user profile link.
|
||||
$out[] = 'Disallow: /p/';
|
||||
// Phorge specific entries end here.
|
||||
$out[] = '# This is cloudflare endpoint';
|
||||
$out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/';
|
||||
$out[] = 'Disallow: /cdn-cgi/';
|
||||
$out[] = '# Google Ads are not welcome';
|
||||
$out[] = 'User-agent: Mediapartners-Google';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: AdsBot-Google';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: AdsBot-Google-Mobile';
|
||||
$out[] = 'Disallow: /';
|
||||
// While I sometimes (borderline 'rare') use LLMs (GPT, Gemini, …), I'd rather prefer LLMs not use my stuff to profit
|
||||
// Well I think my stuff is mostly out of interest for them, tho…
|
||||
$out[] = '# ChatGPT Crawlers are not welcome';
|
||||
$out[] = '# Ref: https://platform.openai.com/docs/plugins/bot';
|
||||
$out[] = 'User-agent: ChatGPT-User';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: GPTBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Google Gemini AI Crawlers are also not welcome';
|
||||
$out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended';
|
||||
$out[] = 'User-agent: Google-Extended';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# CCBot (ab)used to train LLMs';
|
||||
$out[] = '# Ref: https://darkvisitors.com/agents/ccbot';
|
||||
$out[] = 'User-agent: CCBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Facebook LLM Bot';
|
||||
$out[] = '# Ref: https://developers.facebook.com/docs/sharing/bot/';
|
||||
$out[] = 'User-agent: FacebookBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# DiffBot, though this one is known to have option to ignore robotstxt';
|
||||
$out[] = '# Ref https://docs.diffbot.com/docs/why-is-my-crawl-not-crawling-and-other-uncommon-crawl-problems';
|
||||
$out[] = 'User-agent: Diffbot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Bytespider';
|
||||
$out[] = '# Ref: https://darkvisitors.com/agents/bytespider';
|
||||
$out[] = 'User-agent: Bytespider';
|
||||
$out[] = 'Disallow: /';
|
||||
// Crawl-delay entries at the bottom
|
||||
// Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129
|
||||
$out[] = 'User-agent: *';
|
||||
$out[] = 'Crawl-delay: 1';
|
||||
$out[] = '# version: 20240502T184200+0900';
|
||||
$out[] = '# also at https://github.com/revi/sandbox.git';
|
||||
$out[] = 'User-Agent: *';
|
||||
$out[] = 'Disallow: /diffusion/';
|
||||
$out[] = 'Disallow: /source/';
|
||||
$out[] = 'Disallow: /multimeter/';
|
||||
$out[] = 'Disallow: /policy/explain';
|
||||
$out[] = 'Disallow: /auth';
|
||||
$out[] = 'Disallow: /login';
|
||||
$out[] = 'Disallow: /maniphest/transaction';
|
||||
$out[] = 'Disallow: /tag';
|
||||
$out[] = 'Disallow: /search/query/all';
|
||||
$out[] = 'Disallow: /conduit';
|
||||
$out[] = 'Disallow: /api';
|
||||
$out[] = 'Disallow: /project';
|
||||
$out[] = 'Disallow: /applications';
|
||||
$out[] = 'Disallow: /token';
|
||||
$out[] = 'Disallow: /pholio';
|
||||
$out[] = 'Disallow: /dashboard';
|
||||
$out[] = 'Disallow: /calendar';
|
||||
$out[] = 'Disallow: /herald';
|
||||
// This is commits.
|
||||
$out[] = 'Disallow: /r*';
|
||||
// This is differential reviews
|
||||
$out[] = 'Disallow: /differential';
|
||||
$out[] = 'Disallow: /D*%24*';
|
||||
// This is Files. (F$)
|
||||
$out[] = 'Disallow: /file';
|
||||
$out[] = 'Disallow: /F*%24*';
|
||||
// This is pastes (P$)
|
||||
$out[] = 'Disallow: /paste';
|
||||
$out[] = 'Disallow: /P*%24*';
|
||||
// This is blog entries (J$)
|
||||
$out[] = 'Disallow: /phame';
|
||||
$out[] = 'Disallow: /J*%24*';
|
||||
// This is user list.
|
||||
// As of 2024-04-17 user list is behind auth but who knows it might change?
|
||||
$out[] = 'Disallow: /people';
|
||||
// This is user profile link.
|
||||
$out[] = 'Disallow: /p/';
|
||||
// Phorge specific entries end here.
|
||||
$out[] = '# This is cloudflare endpoint';
|
||||
$out[] = '# Ref: https://developers.cloudflare.com/fundamentals/reference/cdn-cgi-endpoint/';
|
||||
$out[] = 'Disallow: /cdn-cgi/';
|
||||
$out[] = '# Google Ads are not welcome';
|
||||
$out[] = 'User-agent: Mediapartners-Google';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: AdsBot-Google';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: AdsBot-Google-Mobile';
|
||||
$out[] = 'Disallow: /';
|
||||
// While I sometimes (borderline 'rare') use LLMs (GPT, Gemini, …), I'd rather prefer LLMs not use my stuff to profit
|
||||
// Well I think my stuff is mostly out of interest for them, tho…
|
||||
$out[] = '# ChatGPT Crawlers are not welcome';
|
||||
$out[] = '# Ref: https://platform.openai.com/docs/plugins/bot';
|
||||
$out[] = 'User-agent: ChatGPT-User';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = 'User-agent: GPTBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Google Gemini AI Crawlers are also not welcome';
|
||||
$out[] = '# Ref: https://developers.google.com/search/docs/crawling-indexing/overview-google-crawlers?hl=en#google-extended';
|
||||
$out[] = 'User-agent: Google-Extended';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# CCBot (ab)used to train LLMs';
|
||||
$out[] = '# Ref: https://darkvisitors.com/agents/ccbot';
|
||||
$out[] = 'User-agent: CCBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Facebook LLM Bot';
|
||||
$out[] = '# Ref: https://developers.facebook.com/docs/sharing/bot/';
|
||||
$out[] = 'User-agent: FacebookBot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# DiffBot, though this one is known to have option to ignore robotstxt';
|
||||
$out[] = '# Ref https://docs.diffbot.com/docs/why-is-my-crawl-not-crawling-and-other-uncommon-crawl-problems';
|
||||
$out[] = 'User-agent: Diffbot';
|
||||
$out[] = 'Disallow: /';
|
||||
$out[] = '# Bytespider';
|
||||
$out[] = '# Ref: https://darkvisitors.com/agents/bytespider';
|
||||
$out[] = 'User-agent: Bytespider';
|
||||
$out[] = 'Disallow: /';
|
||||
// Crawl-delay entries at the bottom
|
||||
// Ref: https://github.com/otwcode/otwarchive/pull/4411#discussion_r1044351129
|
||||
$out[] = 'User-agent: *';
|
||||
$out[] = 'Crawl-delay: 1';
|
||||
|
||||
$content = implode("\n", $out)."\n";
|
||||
$content = implode("\n", $out)."\n";
|
||||
|
||||
return id(new AphrontPlainTextResponse())
|
||||
->setContent($content)
|
||||
->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
|
||||
->setCanCDN(true)
|
||||
->setLastModified(1714415220);
|
||||
}
|
||||
return id(new AphrontPlainTextResponse())
|
||||
->setContent($content)
|
||||
->setCacheDurationInSeconds(phutil_units('2 hours in seconds'))
|
||||
->setCanCDN(true)
|
||||
->setLastModified(1714642920);
|
||||
}
|
||||
}
|
17
time.php
17
time.php
|
@ -1,9 +1,13 @@
|
|||
<html>
|
||||
<html lang="en" prefix="og: https://ogp.me/ns#">
|
||||
<head>
|
||||
<title>
|
||||
Hey revi, what time is it for you?
|
||||
</title>
|
||||
<link rel="icon" href="https://r2.revicdn.net/pfp2.png">
|
||||
<meta property="og:title" content="Hey revi, what time is it for you?" />
|
||||
<meta property="og:description" content="A simple page that shows current date and time for revi." />
|
||||
<meta property="og:image" content="https://r2.revicdn.net/pfp2.png" />
|
||||
<meta property="og:url" content="https://k.revi.xyz/time.php" />
|
||||
<style>
|
||||
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono&family=IBM+Plex+Sans+KR:wght@300;400&family=Noto+Sans+KR&display=swap');
|
||||
|
||||
|
@ -13,6 +17,8 @@
|
|||
line-height: 1.6;
|
||||
font-size: 16px;
|
||||
padding: 0 10px;
|
||||
overflow-wrap: break-all;
|
||||
word-break: keep-all;
|
||||
font-family:
|
||||
'IBM Plex Sans KR',
|
||||
'Noto Sans KR',
|
||||
|
@ -26,10 +32,10 @@
|
|||
'Segoe UI Emoji';
|
||||
}
|
||||
.code {
|
||||
font-family:
|
||||
'IBM Plex Mono',
|
||||
'Courier New',
|
||||
monospace;
|
||||
font-family:
|
||||
'IBM Plex Mono',
|
||||
'Courier New',
|
||||
monospace;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
|
@ -56,7 +62,6 @@
|
|||
echo "<p>It is {$date} {$time} in <code>Asia/Seoul</code>, where <a rel='me' href='https://revi.xyz'>revi</a> lives.<br />Check in your timezone: <a href='{$zonestampLink}'>ZoneStamp</a>.</p>";
|
||||
|
||||
?>
|
||||
<br />
|
||||
<hr>
|
||||
<p><a href='https://github.com/revi/sandbox/blob/master/time.php'>Source code @ GitHub</a>. (Warning: it is far from 'clean'.) Also, please note that there is <a href='https://xkcd.com/1179/'>only one correct way to write dates</a>.</p>
|
||||
</body>
|
||||
|
|
Loading…
Reference in a new issue