2011-07-03 09:47:31 -07:00
|
|
|
<?php
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Collects old logs and caches to reduce the amount of data stored in the
|
|
|
|
* database.
|
|
|
|
*
|
|
|
|
* @group daemon
|
|
|
|
*/
|
2012-03-13 16:21:04 -07:00
|
|
|
final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
|
2011-07-03 09:47:31 -07:00
|
|
|
|
|
|
|
public function run() {
|
|
|
|
|
|
|
|
// Keep track of when we start and stop the GC so we can emit useful log
|
|
|
|
// messages.
|
|
|
|
$just_ran = false;
|
|
|
|
|
|
|
|
do {
|
|
|
|
$run_at = PhabricatorEnv::getEnvConfig('gcdaemon.run-at');
|
|
|
|
$run_for = PhabricatorEnv::getEnvConfig('gcdaemon.run-for');
|
|
|
|
|
|
|
|
// Just use the default timezone, we don't need to get fancy and try
|
|
|
|
// to localize this.
|
|
|
|
$start = strtotime($run_at);
|
|
|
|
if ($start === false) {
|
|
|
|
throw new Exception(
|
|
|
|
"Configuration 'gcdaemon.run-at' could not be parsed: '{$run_at}'.");
|
|
|
|
}
|
|
|
|
|
|
|
|
$now = time();
|
|
|
|
|
|
|
|
if ($now < $start || $now > ($start + $run_for)) {
|
|
|
|
if ($just_ran) {
|
2012-07-11 11:40:18 -07:00
|
|
|
$this->log("Stopped garbage collector.");
|
2011-07-03 09:47:31 -07:00
|
|
|
$just_ran = false;
|
|
|
|
}
|
|
|
|
// The configuration says we can't collect garbage right now, so
|
|
|
|
// just sleep until we can.
|
|
|
|
$this->sleep(300);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!$just_ran) {
|
2012-07-11 11:40:18 -07:00
|
|
|
$this->log("Started garbage collector.");
|
2011-07-03 09:47:31 -07:00
|
|
|
$just_ran = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
$n_herald = $this->collectHeraldTranscripts();
|
|
|
|
$n_daemon = $this->collectDaemonLogs();
|
2011-07-08 15:26:33 -07:00
|
|
|
$n_parse = $this->collectParseCaches();
|
2012-07-11 11:40:18 -07:00
|
|
|
$n_markup = $this->collectMarkupCaches();
|
2012-10-31 15:22:16 -07:00
|
|
|
$n_tasks = $this->collectArchivedTasks();
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
$n_cache = $this->collectGeneralCaches();
|
2011-07-03 09:47:31 -07:00
|
|
|
|
|
|
|
$collected = array(
|
2011-07-08 15:26:33 -07:00
|
|
|
'Herald Transcript' => $n_herald,
|
|
|
|
'Daemon Log' => $n_daemon,
|
|
|
|
'Differential Parse Cache' => $n_parse,
|
2012-07-11 11:40:18 -07:00
|
|
|
'Markup Cache' => $n_markup,
|
2012-10-31 15:22:16 -07:00
|
|
|
'Archived Tasks' => $n_tasks,
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
'General Cache Entries' => $n_cache,
|
2011-07-03 09:47:31 -07:00
|
|
|
);
|
|
|
|
$collected = array_filter($collected);
|
|
|
|
|
|
|
|
foreach ($collected as $thing => $count) {
|
|
|
|
$count = number_format($count);
|
2012-07-11 11:40:18 -07:00
|
|
|
$this->log("Garbage collected {$count} '{$thing}' objects.");
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
$total = array_sum($collected);
|
|
|
|
if ($total < 100) {
|
|
|
|
// We didn't max out any of the GCs so we're basically caught up. Ease
|
|
|
|
// off the GC loop so we don't keep doing table scans just to delete
|
|
|
|
// a handful of rows.
|
|
|
|
$this->sleep(300);
|
|
|
|
} else {
|
|
|
|
$this->stillWorking();
|
|
|
|
}
|
|
|
|
} while (true);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
private function collectHeraldTranscripts() {
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.herald-transcripts');
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new HeraldTranscript();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'UPDATE %T SET
|
|
|
|
objectTranscript = "",
|
|
|
|
ruleTranscripts = "",
|
|
|
|
conditionTranscripts = "",
|
2011-07-26 18:11:54 -07:00
|
|
|
applyTranscripts = "",
|
|
|
|
garbageCollected = 1
|
|
|
|
WHERE garbageCollected = 0 AND `time` < %d
|
2011-07-03 09:47:31 -07:00
|
|
|
LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
|
|
|
private function collectDaemonLogs() {
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.daemon-logs');
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorDaemonLogEvent();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE epoch < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2011-07-08 15:26:33 -07:00
|
|
|
private function collectParseCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.differential-parse-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new DifferentialChangeset();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
DifferentialChangeset::TABLE_CACHE,
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|
|
|
|
|
2012-07-11 11:40:18 -07:00
|
|
|
private function collectMarkupCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.markup-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorMarkupCache();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2012-10-31 15:22:16 -07:00
|
|
|
private function collectArchivedTasks() {
|
|
|
|
$key = 'gcdaemon.ttl.task-archive';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$table = new PhabricatorWorkerArchiveTask();
|
|
|
|
$data_table = new PhabricatorWorkerTaskData();
|
|
|
|
$conn_w = $table->establishConnection('w');
|
|
|
|
|
|
|
|
$rows = queryfx_all(
|
|
|
|
$conn_w,
|
|
|
|
'SELECT id, dataID FROM %T WHERE dateCreated < %d LIMIT 100',
|
|
|
|
$table->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
if (!$rows) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$data_ids = array_filter(ipull($rows, 'dataID'));
|
|
|
|
$task_ids = ipull($rows, 'id');
|
|
|
|
|
|
|
|
$table->openTransaction();
|
|
|
|
if ($data_ids) {
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE id IN (%Ld)',
|
|
|
|
$data_table->getTableName(),
|
|
|
|
$data_ids);
|
|
|
|
}
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE id IN (%Ld)',
|
|
|
|
$table->getTableName(),
|
|
|
|
$task_ids);
|
|
|
|
$table->saveTransaction();
|
|
|
|
|
|
|
|
return count($task_ids);
|
|
|
|
}
|
|
|
|
|
Implement a more compact, general database-backed key-value cache
Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:
- It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
- It has a 12-byte hash-based key.
- It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).
Test Plan:
- Basics:
- Loaded /paste/, saw caches generate and save.
- Reloaded /paste/, saw the page hit cache.
- GC:
- Ran GC daemon, saw nothing.
- Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
- Deflate:
- Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
- Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
- Ran unit tests
Reviewers: vrana, btrahan
Reviewed By: vrana
CC: aran
Differential Revision: https://secure.phabricator.com/D4259
2012-12-21 14:17:56 -08:00
|
|
|
|
|
|
|
private function collectGeneralCaches() {
|
|
|
|
$key = 'gcdaemon.ttl.general-cache';
|
|
|
|
$ttl = PhabricatorEnv::getEnvConfig($key);
|
|
|
|
if ($ttl <= 0) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$cache = new PhabricatorKeyValueDatabaseCache();
|
|
|
|
$conn_w = $cache->establishConnection('w');
|
|
|
|
|
|
|
|
queryfx(
|
|
|
|
$conn_w,
|
|
|
|
'DELETE FROM %T WHERE cacheCreated < %d
|
|
|
|
ORDER BY cacheCreated ASC LIMIT 100',
|
|
|
|
$cache->getTableName(),
|
|
|
|
time() - $ttl);
|
|
|
|
|
|
|
|
return $conn_w->getAffectedRows();
|
|
|
|
}
|
|
|
|
|
2011-07-03 09:47:31 -07:00
|
|
|
}
|