mirror of
https://we.phorge.it/source/phorge.git
synced 2024-11-09 16:32:39 +01:00
Add a garbage collector daemon
Summary: Phabricator generates a bunch of data that we don't need to keep around forever, add a GC daemon to get rid of it with some basic configuration options. This needs a couple more diffs to get some of the details but I think this is a reasonable start. I also fixed a couple of UI things related to this, e.g. the daemon logs page going crazy when a daemon gets stuck in a loop and dumps tons of data to stdout. Test Plan: - Ran gc daemon in 'phd debug' mode and saw it delete stuff, then sleep once it had cleaned everything up. - Mucked around with TTLs and verified they work correctly. - Viewed gc'd transcripts in the web interface and made sure they displayed okay. - Viewed daemon logs before/after garbage collection. - Running some run-at / run-for tests now, I'll update if the daemon doesn't shut off in ~10-15 minutes. :P Reviewed By: tuomaspelkonen Reviewers: jungejason, tuomaspelkonen, aran CC: aran, tuomaspelkonen, epriestley Differential Revision: 583
This commit is contained in:
parent
51de554238
commit
4ef918e213
10 changed files with 259 additions and 51 deletions
|
@ -375,6 +375,37 @@ return array(
|
|||
// silly (but sort of awesome).
|
||||
'remarkup.enable-embedded-youtube' => false,
|
||||
|
||||
|
||||
// -- Garbage Collection ---------------------------------------------------- //
|
||||
|
||||
// Phabricator generates various logs and caches in the database which can
|
||||
// be garbage collected after a while to make the total data size more
|
||||
// manageable. To run garbage collection, launch a
|
||||
// PhabricatorGarbageCollector daemon.
|
||||
|
||||
// Since the GC daemon can issue large writes and table scans, you may want to
|
||||
// run it only during off hours or make sure it is scheduled so it doesn't
|
||||
// overlap with backups. This determines when the daemon can start running
|
||||
// each day.
|
||||
'gcdaemon.run-at' => '12 AM',
|
||||
|
||||
// How many seconds after 'gcdaemon.run-at' the daemon may collect garbage
|
||||
// for. By default it runs continuously, but you can set it to run for a
|
||||
// limited period of time. For instance, if you do backups at 3 AM, you might
|
||||
// run garbage collection for an hour beforehand. This is not a high-precision
|
||||
// limit so you may want to leave some room for the GC to actually stop, and
|
||||
// if you set it to something like 3 seconds you're on your own.
|
||||
'gcdaemon.run-for' => 24 * 60 * 60,
|
||||
|
||||
// These 'ttl' keys configure how much old data the GC daemon keeps around.
|
||||
// Objects older than the ttl will be collected. Set any value to 0 to store
|
||||
// data indefinitely.
|
||||
|
||||
'gcdaemon.ttl.herald-transcripts' => 30 * (24 * 60 * 60),
|
||||
'gcdaemon.ttl.daemon-logs' => 7 * (24 * 60 * 60),
|
||||
'gcdaemon.ttl.differential-render-cache' => 7 * (24 * 60 * 60),
|
||||
|
||||
|
||||
// -- Customization --------------------------------------------------------- //
|
||||
|
||||
// Paths to additional phutil libraries to load.
|
||||
|
|
|
@ -58,5 +58,6 @@ sudo /etc/init.d/httpd start
|
|||
|
||||
# $ROOT/phabricator/bin/phd repository-launch-master
|
||||
# $ROOT/phabricator/bin/phd launch metamta
|
||||
# $ROOT/phabricator/bin/phd launch garbagecollector
|
||||
# $ROOT/phabricator/bin/phd launch 4 taskmaster
|
||||
# $ROOT/phabricator/bin/phd launch ircbot /config/bot.json
|
||||
|
|
|
@ -352,6 +352,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorFileURI' => 'applications/files/uri',
|
||||
'PhabricatorFileUploadController' => 'applications/files/controller/upload',
|
||||
'PhabricatorFileViewController' => 'applications/files/controller/view',
|
||||
'PhabricatorGarbageCollectorDaemon' => 'infrastructure/daemon/garbagecollector',
|
||||
'PhabricatorGoodForNothingWorker' => 'infrastructure/daemon/workers/worker/goodfornothing',
|
||||
'PhabricatorHandleObjectSelectorDataView' => 'applications/phid/handle/view/selector',
|
||||
'PhabricatorHelpController' => 'applications/help/controller/base',
|
||||
|
@ -844,6 +845,7 @@ phutil_register_library_map(array(
|
|||
'PhabricatorFileTransformController' => 'PhabricatorFileController',
|
||||
'PhabricatorFileUploadController' => 'PhabricatorFileController',
|
||||
'PhabricatorFileViewController' => 'PhabricatorFileController',
|
||||
'PhabricatorGarbageCollectorDaemon' => 'PhabricatorDaemon',
|
||||
'PhabricatorGoodForNothingWorker' => 'PhabricatorWorker',
|
||||
'PhabricatorHelpController' => 'PhabricatorController',
|
||||
'PhabricatorHelpKeyboardShortcutController' => 'PhabricatorHelpController',
|
||||
|
|
|
@ -43,11 +43,46 @@ final class PhabricatorDaemonLogEventsView extends AphrontView {
|
|||
}
|
||||
|
||||
foreach ($this->events as $event) {
|
||||
|
||||
// Limit display log size. If a daemon gets stuck in an output loop this
|
||||
// page can be like >100MB if we don't truncate stuff. Try to do cheap
|
||||
// line-based truncation first, and fall back to expensive UTF-8 character
|
||||
// truncation if that doesn't get things short enough.
|
||||
|
||||
$message = $event->getMessage();
|
||||
|
||||
$more_lines = null;
|
||||
$more_chars = null;
|
||||
$line_limit = 12;
|
||||
if (substr_count($message, "\n") > $line_limit) {
|
||||
$message = explode("\n", $message);
|
||||
$more_lines = count($message) - $line_limit;
|
||||
$message = array_slice($message, 0, $line_limit);
|
||||
$message = implode("\n", $message);
|
||||
}
|
||||
|
||||
$char_limit = 8192;
|
||||
if (strlen($message) > $char_limit) {
|
||||
$message = phutil_utf8v($message);
|
||||
$more_chars = count($message) - $char_limit;
|
||||
$message = array_slice($message, 0, $char_limit);
|
||||
$message = implode('', $message);
|
||||
}
|
||||
|
||||
$more = null;
|
||||
if ($more_chars) {
|
||||
$more = number_format($more_chars);
|
||||
$more = "\n<... {$more} more characters ...>";
|
||||
} else if ($more_lines) {
|
||||
$more = number_format($more_lines);
|
||||
$more = "\n<... {$more} more lines ...>";
|
||||
}
|
||||
|
||||
$row = array(
|
||||
phutil_escape_html($event->getLogType()),
|
||||
phabricator_date($event->getEpoch(), $this->user),
|
||||
phabricator_time($event->getEpoch(), $this->user),
|
||||
str_replace("\n", '<br />', phutil_escape_html($event->getMessage())),
|
||||
str_replace("\n", '<br />', phutil_escape_html($message.$more)),
|
||||
);
|
||||
|
||||
if ($this->combinedLog) {
|
||||
|
|
|
@ -11,6 +11,7 @@ phutil_require_module('phabricator', 'view/control/table');
|
|||
phutil_require_module('phabricator', 'view/utils');
|
||||
|
||||
phutil_require_module('phutil', 'markup');
|
||||
phutil_require_module('phutil', 'utils');
|
||||
|
||||
|
||||
phutil_require_source('PhabricatorDaemonLogEventsView.php');
|
||||
|
|
|
@ -42,40 +42,45 @@ class HeraldTranscriptController extends HeraldController {
|
|||
throw new Exception('Uknown transcript!');
|
||||
}
|
||||
|
||||
$field_names = HeraldFieldConfig::getFieldMap();
|
||||
$condition_names = HeraldConditionConfig::getConditionMap();
|
||||
$action_names = HeraldActionConfig::getActionMap();
|
||||
|
||||
require_celerity_resource('herald-test-css');
|
||||
|
||||
$filter = $this->getFilterPHIDs();
|
||||
$this->filterTranscript($xscript, $filter);
|
||||
$phids = array_merge($filter, $this->getTranscriptPHIDs($xscript));
|
||||
$phids = array_unique($phids);
|
||||
$phids = array_filter($phids);
|
||||
|
||||
$handles = id(new PhabricatorObjectHandleData($phids))
|
||||
->loadHandles();
|
||||
$this->handles = $handles;
|
||||
|
||||
$object_xscript = $xscript->getObjectTranscript();
|
||||
|
||||
$nav = $this->buildSideNav();
|
||||
|
||||
$apply_xscript_panel = $this->buildApplyTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($apply_xscript_panel);
|
||||
$object_xscript = $xscript->getObjectTranscript();
|
||||
if (!$object_xscript) {
|
||||
$notice = id(new AphrontErrorView())
|
||||
->setSeverity(AphrontErrorView::SEVERITY_NOTICE)
|
||||
->setTitle('Old Transcript')
|
||||
->appendChild(
|
||||
'<p>Details of this transcript have been garbage collected.</p>');
|
||||
$nav->appendChild($notice);
|
||||
} else {
|
||||
$filter = $this->getFilterPHIDs();
|
||||
$this->filterTranscript($xscript, $filter);
|
||||
$phids = array_merge($filter, $this->getTranscriptPHIDs($xscript));
|
||||
$phids = array_unique($phids);
|
||||
$phids = array_filter($phids);
|
||||
|
||||
$action_xscript_panel = $this->buildActionTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($action_xscript_panel);
|
||||
$handles = id(new PhabricatorObjectHandleData($phids))
|
||||
->loadHandles();
|
||||
$this->handles = $handles;
|
||||
|
||||
$object_xscript_panel = $this->buildObjectTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($object_xscript_panel);
|
||||
$apply_xscript_panel = $this->buildApplyTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($apply_xscript_panel);
|
||||
|
||||
$action_xscript_panel = $this->buildActionTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($action_xscript_panel);
|
||||
|
||||
$object_xscript_panel = $this->buildObjectTranscriptPanel(
|
||||
$xscript);
|
||||
$nav->appendChild($object_xscript_panel);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
TODO
|
||||
|
||||
$notice = null;
|
||||
if ($xscript->getDryRun()) {
|
||||
|
@ -84,30 +89,6 @@ class HeraldTranscriptController extends HeraldController {
|
|||
This was a dry run to test Herald rules, no actions were executed.
|
||||
</tools:notice>;
|
||||
}
|
||||
|
||||
if (!$object_xscript) {
|
||||
$notice =
|
||||
<x:frag>
|
||||
<tools:notice title="Old Transcript">
|
||||
Details of this transcript have been discarded. Full transcripts
|
||||
are retained for 30 days.
|
||||
</tools:notice>
|
||||
{$notice}
|
||||
</x:frag>;
|
||||
}
|
||||
|
||||
|
||||
return
|
||||
<herald:standard-page title="Transcript">
|
||||
<div style="padding: 1em;">
|
||||
<tools:side-nav items={$this->renderNavItems()}>
|
||||
{$notice}
|
||||
{$apply_xscript_markup}
|
||||
{$rule_table}
|
||||
{$object_xscript_table}
|
||||
</tools:side-nav>
|
||||
</div>
|
||||
</herald:standard-page>;
|
||||
*/
|
||||
|
||||
return $this->buildStandardPageResponse(
|
||||
|
@ -264,7 +245,7 @@ class HeraldTranscriptController extends HeraldController {
|
|||
foreach ($xscript->getApplyTranscripts() as $id => $apply_xscript) {
|
||||
$rule_id = $apply_xscript->getRuleID();
|
||||
if ($filter_owned) {
|
||||
if (!$rule_xscripts[$rule_id]) {
|
||||
if (empty($rule_xscripts[$rule_id])) {
|
||||
// No associated rule so you can't own this effect.
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@ phutil_require_module('phabricator', 'applications/herald/storage/transcript/bas
|
|||
phutil_require_module('phabricator', 'applications/phid/handle/data');
|
||||
phutil_require_module('phabricator', 'infrastructure/celerity/api');
|
||||
phutil_require_module('phabricator', 'view/control/table');
|
||||
phutil_require_module('phabricator', 'view/form/error');
|
||||
phutil_require_module('phabricator', 'view/layout/panel');
|
||||
phutil_require_module('phabricator', 'view/layout/sidenav');
|
||||
|
||||
|
|
|
@ -70,3 +70,4 @@ You can get a list of launchable daemons with **phd list**:
|
|||
- **PhabricatorTaskmasterDaemon** runs a generic task queue; and
|
||||
- **PhabricatorRepository** daemons track repositories, descriptions are
|
||||
available in the @{article:Diffusion User Guide}.
|
||||
- **PhabricatorGarbageCollectorDaemon** cleans up old logs and caches.
|
||||
|
|
|
@ -0,0 +1,139 @@
|
|||
<?php
|
||||
|
||||
/*
|
||||
* Copyright 2011 Facebook, Inc.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Collects old logs and caches to reduce the amount of data stored in the
|
||||
* database.
|
||||
*
|
||||
* @group daemon
|
||||
*/
|
||||
class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
|
||||
|
||||
public function run() {
|
||||
|
||||
// Keep track of when we start and stop the GC so we can emit useful log
|
||||
// messages.
|
||||
$just_ran = false;
|
||||
|
||||
do {
|
||||
$run_at = PhabricatorEnv::getEnvConfig('gcdaemon.run-at');
|
||||
$run_for = PhabricatorEnv::getEnvConfig('gcdaemon.run-for');
|
||||
|
||||
// Just use the default timezone, we don't need to get fancy and try
|
||||
// to localize this.
|
||||
$start = strtotime($run_at);
|
||||
if ($start === false) {
|
||||
throw new Exception(
|
||||
"Configuration 'gcdaemon.run-at' could not be parsed: '{$run_at}'.");
|
||||
}
|
||||
|
||||
$now = time();
|
||||
|
||||
if ($now < $start || $now > ($start + $run_for)) {
|
||||
if ($just_ran) {
|
||||
echo "Stopped garbage collector.\n";
|
||||
$just_ran = false;
|
||||
}
|
||||
// The configuration says we can't collect garbage right now, so
|
||||
// just sleep until we can.
|
||||
$this->sleep(300);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!$just_ran) {
|
||||
echo "Started garbage collector.\n";
|
||||
$just_ran = true;
|
||||
}
|
||||
|
||||
$n_herald = $this->collectHeraldTranscripts();
|
||||
$n_daemon = $this->collectDaemonLogs();
|
||||
$n_render = $this->collectRenderCaches();
|
||||
|
||||
$collected = array(
|
||||
'Herald Transcript' => $n_herald,
|
||||
'Daemon Log' => $n_daemon,
|
||||
'Render Cache' => $n_render,
|
||||
);
|
||||
$collected = array_filter($collected);
|
||||
|
||||
foreach ($collected as $thing => $count) {
|
||||
$count = number_format($count);
|
||||
echo "Garbage collected {$count} '{$thing}' objects.\n";
|
||||
}
|
||||
|
||||
$total = array_sum($collected);
|
||||
if ($total < 100) {
|
||||
// We didn't max out any of the GCs so we're basically caught up. Ease
|
||||
// off the GC loop so we don't keep doing table scans just to delete
|
||||
// a handful of rows.
|
||||
$this->sleep(300);
|
||||
} else {
|
||||
$this->stillWorking();
|
||||
}
|
||||
} while (true);
|
||||
|
||||
}
|
||||
|
||||
private function collectHeraldTranscripts() {
|
||||
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.herald-transcripts');
|
||||
if ($ttl <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$table = new HeraldTranscript();
|
||||
$conn_w = $table->establishConnection('w');
|
||||
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'UPDATE %T SET
|
||||
objectTranscript = "",
|
||||
ruleTranscripts = "",
|
||||
conditionTranscripts = "",
|
||||
applyTranscripts = ""
|
||||
WHERE `time` < %d AND objectTranscript != ""
|
||||
LIMIT 100',
|
||||
$table->getTableName(),
|
||||
time() - $ttl);
|
||||
|
||||
return $conn_w->getAffectedRows();
|
||||
}
|
||||
|
||||
private function collectDaemonLogs() {
|
||||
$ttl = PhabricatorEnv::getEnvConfig('gcdaemon.ttl.daemon-logs');
|
||||
if ($ttl <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
$table = new PhabricatorDaemonLogEvent();
|
||||
$conn_w = $table->establishConnection('w');
|
||||
|
||||
queryfx(
|
||||
$conn_w,
|
||||
'DELETE FROM %T WHERE epoch < %d LIMIT 100',
|
||||
$table->getTableName(),
|
||||
time() - $ttl);
|
||||
|
||||
return $conn_w->getAffectedRows();
|
||||
}
|
||||
|
||||
private function collectRenderCaches() {
|
||||
// TODO: Implement this, no epoch column on the table right now.
|
||||
return 0;
|
||||
}
|
||||
|
||||
}
|
16
src/infrastructure/daemon/garbagecollector/__init__.php
Normal file
16
src/infrastructure/daemon/garbagecollector/__init__.php
Normal file
|
@ -0,0 +1,16 @@
|
|||
<?php
|
||||
/**
|
||||
* This file is automatically generated. Lint this module to rebuild it.
|
||||
* @generated
|
||||
*/
|
||||
|
||||
|
||||
|
||||
phutil_require_module('phabricator', 'applications/herald/storage/transcript/base');
|
||||
phutil_require_module('phabricator', 'infrastructure/daemon/base');
|
||||
phutil_require_module('phabricator', 'infrastructure/daemon/storage/event');
|
||||
phutil_require_module('phabricator', 'infrastructure/env');
|
||||
phutil_require_module('phabricator', 'storage/queryfx');
|
||||
|
||||
|
||||
phutil_require_source('PhabricatorGarbageCollectorDaemon.php');
|
Loading…
Reference in a new issue