2011-07-03 09:47:31 -07:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Collects old logs and caches to reduce the amount of data stored in the
|
|
|
|
* database.
|
|
|
|
*
|
|
|
|
* @group daemon
|
|
|
|
*/
|
2012-03-13 16:21:04 -07:00
|
|
|
final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
|
2011-07-03 09:47:31 -07:00
|
|
|
|
|
|
|
public function run() {
|
|
|
|
do {
|
|
|
|
$n_herald = $this->collectHeraldTranscripts();
|
|
|
|
$n_daemon = $this->collectDaemonLogs();
|
2011-07-08 15:26:33 -07:00
|
|
|
$n_parse = $this->collectParseCaches();
|
2012-07-11 11:40:18 -07:00
|
|
|
$n_markup = $this->collectMarkupCaches();
|
2012-10-31 15:22:16 -07:00
|
|
|
$n_tasks = $this->collectArchivedTasks();
|
2013-02-17 09:13:49 -08:00
|
|
|
$n_cache_ttl = $this->collectGeneralCacheTTL();
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
$n_cache = $this->collectGeneralCaches();
|
2013-02-20 13:33:47 -08:00
|
|
|
$n_files = $this->collectExpiredFiles();
|
2013-07-01 12:37:34 -07:00
|
|
|
$n_clogs = $this->collectExpiredConduitLogs();
|
|
|
|
$n_ccons = $this->collectExpiredConduitConnections();
|
2011-07-03 09:47:31 -07:00
|
|
|
|
|
|
|
$collected = array(
|
2011-07-08 15:26:33 -07:00
|
|
|
'Herald Transcript' => $n_herald,
|
|
|
|
'Daemon Log' => $n_daemon,
|
|
|
|
'Differential Parse Cache' => $n_parse,
|
2012-07-11 11:40:18 -07:00
|
|
|
'Markup Cache' => $n_markup,
|
2012-10-31 15:22:16 -07:00
|
|
|
'Archived Tasks' => $n_tasks,
|
2013-02-17 09:13:49 -08:00
|
|
|
'General Cache TTL' => $n_cache_ttl,
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
'General Cache Entries' => $n_cache,
|
2013-02-20 13:33:47 -08:00
|
|
|
'Temporary Files' => $n_files,
|
2013-07-01 12:37:34 -07:00
|
|
|
'Conduit Logs' => $n_clogs,
|
|
|
|
'Conduit Connections' => $n_ccons,
|
2011-07-03 09:47:31 -07:00
|
|
|
);
|
|
|
|
$collected = array_filter($collected);
|
|
|
|
|
|
|
|
foreach ($collected as $thing => $count) {
|
|
|
|
$count = number_format($count);
|
2012-07-11 11:40:18 -07:00
|
|
|
$this->log("Garbage collected {$count} '{$thing}' objects.");
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
$total = array_sum($collected);
|
|
|
|
if ($total < 100) {
|
|
|
|
// We didn't max out any of the GCs so we're basically caught up. Ease
|
|
|
|
// off the GC loop so we don't keep doing table scans just to delete
|
2013-01-02 14:03:08 -08:00
|
|
|
// a handful of rows; wake up in a few hours.
|
|
|
|
$this->sleep(4 * (60 * 60));
|
2011-07-03 09:47:31 -07:00
|
|
|
} else {
|
|
|
|
$this->stillWorking();
|
|
|
|
}
|
|
|
|
} while (true);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
private function collectHeraldTranscripts() {
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.herald-transcripts');
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new HeraldTranscript();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'UPDATE %T SET
|
|
|
|
objectTranscript = "",
|
|
|
|
ruleTranscripts = "",
|
|
|
|
conditionTranscripts = "",
|
2011-07-26 18:11:54 -07:00
|
|
|
applyTranscripts = "",
|
|
|
|
garbageCollected = 1
|
|
|
|
WHERE garbageCollected = 0 AND `time` < %d
|
2011-07-03 09:47:31 -07:00
|
|
|
LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
|
|
|
private function collectDaemonLogs() {
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.daemon-logs');
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorDaemonLogEvent();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE epoch < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2011-07-08 15:26:33 -07:00
|
|
|
private function collectParseCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.differential-parse-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new DifferentialChangeset();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
DifferentialChangeset::TABLE_CACHE,
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:40:18 -07:00
|
|
|
private function collectMarkupCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.markup-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorMarkupCache();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2012-10-31 15:22:16 -07:00
|
|
|
private function collectArchivedTasks() {
|
|
|
|
$key = 'gcdaemon.ttl.task-archive';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorWorkerArchiveTask();
|
|
|
|
$data_table = new PhabricatorWorkerTaskData();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
$rows = queryfx_all(
|
|
|
|
$conn_w,
|
|
|
|
'SELECT id, dataID FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
if (!$rows) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$data_ids = array_filter(ipull($rows, 'dataID'));
|
|
|
|
$task_ids = ipull($rows, 'id');
|
|
|
|
|
|
|
|
$table->openTransaction();
|
|
|
|
if ($data_ids) {
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE id IN (%Ld)',
|
|
|
|
$data_table->getTableName(),
|
|
|
|
$data_ids);
|
|
|
|
}
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE id IN (%Ld)',
|
|
|
|
$table->getTableName(),
|
|
|
|
$task_ids);
|
|
|
|
$table->saveTransaction();
|
|
|
|
|
|
|
|
return count($task_ids);
|
|
|
|
}
|
|
|
|
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
|
2013-02-17 09:13:49 -08:00
|
|
|
private function collectGeneralCacheTTL() {
|
|
|
|
$cache = new PhabricatorKeyValueDatabaseCache();
|
|
|
|
$conn_w = $cache->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE cacheExpires < %d
|
|
|
|
ORDER BY cacheExpires ASC LIMIT 100',
|
|
|
|
$cache->getTableName(),
|
|
|
|
time());
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
private function collectGeneralCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.general-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$cache = new PhabricatorKeyValueDatabaseCache();
|
|
|
|
$conn_w = $cache->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE cacheCreated < %d
|
|
|
|
ORDER BY cacheCreated ASC LIMIT 100',
|
|
|
|
$cache->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2013-02-20 13:33:47 -08:00
|
|
|
private function collectExpiredFiles() {
|
|
|
|
$files = id(new PhabricatorFile())->loadAllWhere('ttl < %d LIMIT 100',
|
|
|
|
time());
|
|
|
|
|
|
|
|
foreach ($files as $file) {
|
|
|
|
$file->delete();
|
|
|
|
}
|
|
|
|
|
|
|
|
return count($files);
|
|
|
|
}
|
2013-02-17 09:13:49 -08:00
|
|
|
|
2013-07-01 12:37:34 -07:00
|
|
|
private function collectExpiredConduitLogs() {
|
|
|
|
$key = 'gcdaemon.ttl.conduit-logs';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorConduitMethodCallLog();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d
|
|
|
|
ORDER BY dateCreated ASC LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
|
|
|
private function collectExpiredConduitConnections() {
|
|
|
|
$key = 'gcdaemon.ttl.conduit-logs';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorConduitConnectionLog();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d
|
|
|
|
ORDER BY dateCreated ASC LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|