1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-09-19 16:58:48 +02:00

Implement a more compact, general database-backed key-value cache

Summary:
See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features:

  - It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL.
  - It has a 12-byte hash-based key.
  - It automatically compresses large blocks of data (most of what we store is highly-compressible HTML).

Test Plan:
  - Basics:
    - Loaded /paste/, saw caches generate and save.
    - Reloaded /paste/, saw the page hit cache.
  - GC:
    - Ran GC daemon, saw nothing.
    - Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache.
  - Deflate:
    - Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage.
    - Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list).
  - Ran unit tests

Reviewers: vrana, btrahan

Reviewed By: vrana

CC: aran

Differential Revision: https://secure.phabricator.com/D4259
This commit is contained in:
epriestley 2012-12-21 14:17:56 -08:00
parent 62bc3373e5
commit aae5f9efd3
9 changed files with 407 additions and 13 deletions

View file

@ -1130,6 +1130,13 @@ return array(
'remarkup.enable-embedded-youtube' => false,
// -- Cache ----------------------------------------------------------------- //
// Set this to false to disable the use of gzdeflate()-based compression in
// some caches. This may give you less performant (but more debuggable)
// caching.
'cache.enable-deflate' => true,
// -- Garbage Collection ---------------------------------------------------- //
// Phabricator generates various logs and caches in the database which can
@ -1160,6 +1167,7 @@ return array(
'gcdaemon.ttl.differential-parse-cache' => 14 * (24 * 60 * 60),
'gcdaemon.ttl.markup-cache' => 30 * (24 * 60 * 60),
'gcdaemon.ttl.task-archive' => 14 * (24 * 60 * 60),
'gcdaemon.ttl.general-cache' => 30 * (24 * 60 * 60),
// -- Feed ------------------------------------------------------------------ //

View file

@ -0,0 +1,11 @@
CREATE TABLE {$NAMESPACE}_cache.cache_general (
id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
cacheKeyHash CHAR(12) BINARY NOT NULL,
cacheKey VARCHAR(128) NOT NULL COLLATE utf8_bin,
cacheFormat VARCHAR(16) NOT NULL COLLATE utf8_bin,
cacheData LONGBLOB NOT NULL,
cacheCreated INT UNSIGNED NOT NULL,
cacheExpires INT UNSIGNED,
KEY `key_cacheCreated` (cacheCreated),
UNIQUE KEY `key_cacheKeyHash` (cacheKeyHash)
) ENGINE=InnoDB, COLLATE utf8_general_ci;

View file

@ -505,6 +505,7 @@ phutil_register_library_map(array(
'JavelinUIExample' => 'applications/uiexample/examples/JavelinUIExample.php',
'JavelinViewExample' => 'applications/uiexample/examples/JavelinViewExample.php',
'JavelinViewExampleServerView' => 'applications/uiexample/examples/JavelinViewExampleServerView.php',
'LiskChunkTestCase' => 'infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php',
'LiskDAO' => 'infrastructure/storage/lisk/LiskDAO.php',
'LiskDAOSet' => 'infrastructure/storage/lisk/LiskDAOSet.php',
'LiskDAOTestCase' => 'infrastructure/storage/lisk/__tests__/LiskDAOTestCase.php',
@ -835,6 +836,7 @@ phutil_register_library_map(array(
'PhabricatorInlineSummaryView' => 'infrastructure/diff/view/PhabricatorInlineSummaryView.php',
'PhabricatorJavelinLinter' => 'infrastructure/lint/linter/PhabricatorJavelinLinter.php',
'PhabricatorJumpNavHandler' => 'applications/search/engine/PhabricatorJumpNavHandler.php',
'PhabricatorKeyValueDatabaseCache' => 'applications/cache/PhabricatorKeyValueDatabaseCache.php',
'PhabricatorLDAPLoginController' => 'applications/auth/controller/PhabricatorLDAPLoginController.php',
'PhabricatorLDAPProvider' => 'applications/auth/ldap/PhabricatorLDAPProvider.php',
'PhabricatorLDAPRegistrationController' => 'applications/auth/controller/PhabricatorLDAPRegistrationController.php',
@ -1789,6 +1791,7 @@ phutil_register_library_map(array(
'JavelinUIExample' => 'PhabricatorUIExample',
'JavelinViewExample' => 'PhabricatorUIExample',
'JavelinViewExampleServerView' => 'AphrontView',
'LiskChunkTestCase' => 'PhabricatorTestCase',
'LiskDAOTestCase' => 'PhabricatorTestCase',
'LiskEphemeralObjectException' => 'Exception',
'LiskFixtureTestCase' => 'PhabricatorTestCase',
@ -2118,6 +2121,7 @@ phutil_register_library_map(array(
'PhabricatorInlineCommentPreviewController' => 'PhabricatorController',
'PhabricatorInlineSummaryView' => 'AphrontView',
'PhabricatorJavelinLinter' => 'ArcanistLinter',
'PhabricatorKeyValueDatabaseCache' => 'PhutilKeyValueCache',
'PhabricatorLDAPLoginController' => 'PhabricatorAuthController',
'PhabricatorLDAPRegistrationController' => 'PhabricatorAuthController',
'PhabricatorLDAPUnknownUserException' => 'Exception',

View file

@ -0,0 +1,232 @@
<?php
final class PhabricatorKeyValueDatabaseCache
extends PhutilKeyValueCache {
const CACHE_FORMAT_RAW = 'raw';
const CACHE_FORMAT_DEFLATE = 'deflate';
public function setKeys(array $keys, $ttl = null) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-set',
'name' => 'phabricator-db',
'keys' => array_keys($keys),
'ttl' => $ttl,
));
}
if ($keys) {
$map = $this->digestKeys(array_keys($keys));
$conn_w = $this->establishConnection('w');
$sql = array();
foreach ($map as $key => $hash) {
$value = $keys[$key];
list($format, $storage_value) = $this->willWriteValue($key, $value);
$sql[] = qsprintf(
$conn_w,
'(%s, %s, %s, %s, %d, %nd)',
$hash,
$key,
$format,
$storage_value,
time(),
$ttl ? (time() + $ttl) : null);
}
$guard = AphrontWriteGuard::beginScopedUnguardedWrites();
foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
queryfx(
$conn_w,
'INSERT INTO %T
(cacheKeyHash, cacheKey, cacheFormat, cacheData,
cacheCreated, cacheExpires) VALUES %Q
ON DUPLICATE KEY UPDATE
cacheKey = VALUES(cacheKey),
cacheFormat = VALUES(cacheFormat),
cacheData = VALUES(cacheData),
cacheCreated = VALUES(cacheCreated),
cacheExpires = VALUES(cacheExpires)',
$this->getTableName(),
$chunk);
}
unset($guard);
}
if ($call_id) {
$this->getProfiler()->endServiceCall($call_id, array());
}
return $this;
}
public function getKeys(array $keys) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-get',
'name' => 'phabricator-db',
'keys' => $keys,
));
}
$results = array();
if ($keys) {
$map = $this->digestKeys($keys);
$rows = queryfx_all(
$this->establishConnection('r'),
'SELECT * FROM %T WHERE cacheKeyHash IN (%Ls)',
$this->getTableName(),
$map);
$rows = ipull($rows, null, 'cacheKey');
foreach ($keys as $key) {
if (empty($rows[$key])) {
continue;
}
$row = $rows[$key];
if ($row['cacheExpires'] && ($row['cacheExpires'] < time())) {
continue;
}
try {
$results[$key] = $this->didReadValue(
$row['cacheFormat'],
$row['cacheData']);
} catch (Exception $ex) {
// Treat this as a cache miss.
phlog($ex);
}
}
}
if ($call_id) {
$this->getProfiler()->endServiceCall(
$call_id,
array(
'hits' => array_keys($results),
));
}
return $results;
}
public function deleteKeys(array $keys) {
$call_id = null;
if ($this->getProfiler()) {
$call_id = $this->getProfiler()->beginServiceCall(
array(
'type' => 'kvcache-del',
'name' => 'phabricator-db',
'keys' => $keys,
));
}
if ($keys) {
$map = $this->digestKeys($keys);
queryfx(
$this->establishConnection('w'),
'DELETE FROM %T WHERE cacheKeyHash IN (%Ls)',
$this->getTableName(),
$keys);
}
if ($call_id) {
$this->getProfiler()->endServiceCall($call_id, array());
}
return $this;
}
public function destroyCache() {
queryfx(
$this->establishConnection('w'),
'DELETE FROM %T',
$this->getTableName());
return $this;
}
/* -( Raw Cache Access )--------------------------------------------------- */
public function establishConnection($mode) {
// TODO: This is the only concrete table we have on the database right
// now.
return id(new PhabricatorMarkupCache())->establishConnection($mode);
}
public function getTableName() {
return 'cache_general';
}
/* -( Implementation )----------------------------------------------------- */
private function digestKeys(array $keys) {
$map = array();
foreach ($keys as $key) {
$map[$key] = PhabricatorHash::digestForIndex($key);
}
return $map;
}
private function willWriteValue($key, $value) {
if (!is_string($value)) {
throw new Exception("Only strings may be written to the DB cache!");
}
static $can_deflate;
if ($can_deflate === null) {
$can_deflate = function_exists('gzdeflate') &&
PhabricatorEnv::getEnvConfig('cache.enable-deflate');
}
// If the value is larger than 1KB, we have gzdeflate(), we successfully
// can deflate it, and it benefits from deflation, store it deflated.
if ($can_deflate) {
$len = strlen($value);
if ($len > 1024) {
$deflated = gzdeflate($value);
if ($deflated !== false) {
$deflated_len = strlen($deflated);
if ($deflated_len < ($len / 2)) {
return array(self::CACHE_FORMAT_DEFLATE, $deflated);
}
}
}
}
return array(self::CACHE_FORMAT_RAW, $value);
}
private function didReadValue($format, $value) {
switch ($format) {
case self::CACHE_FORMAT_RAW:
return $value;
case self::CACHE_FORMAT_DEFLATE:
if (!function_exists('gzinflate')) {
throw new Exception("No gzinflate() to read deflated cache.");
}
$value = gzinflate($value);
if ($value === false) {
throw new Exception("Failed to deflate cache.");
}
return $value;
default:
throw new Exception("Unknown cache format.");
}
}
}

View file

@ -124,23 +124,22 @@ final class PhabricatorPasteQuery
}
private function loadContent(array $pastes) {
$cache = id(new PhabricatorKeyValueDatabaseCache())
->setProfiler(PhutilServiceProfiler::getInstance());
$keys = array();
foreach ($pastes as $paste) {
$keys[] = $this->getContentCacheKey($paste);
}
// TODO: Move to a more appropriate/general cache once we have one? For
// now, this gets automatic GC.
$caches = id(new PhabricatorMarkupCache())->loadAllWhere(
'cacheKey IN (%Ls)',
$keys);
$caches = mpull($caches, null, 'getCacheKey');
$caches = $cache->getKeys($keys);
$need_raw = array();
foreach ($pastes as $paste) {
$key = $this->getContentCacheKey($paste);
if (isset($caches[$key])) {
$paste->attachContent($caches[$key]->getCacheData());
$paste->attachContent($caches[$key]);
} else {
$need_raw[] = $paste;
}
@ -150,18 +149,17 @@ final class PhabricatorPasteQuery
return;
}
$write_data = array();
$this->loadRawContent($need_raw);
foreach ($need_raw as $paste) {
$content = $this->buildContent($paste);
$paste->attachContent($content);
$guard = AphrontWriteGuard::beginScopedUnguardedWrites();
id(new PhabricatorMarkupCache())
->setCacheKey($this->getContentCacheKey($paste))
->setCacheData($content)
->replace();
unset($guard);
$write_data[$this->getContentCacheKey($paste)] = (string)$content;
}
$cache->setKeys($write_data);
}

View file

@ -49,6 +49,7 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
$n_parse = $this->collectParseCaches();
$n_markup = $this->collectMarkupCaches();
$n_tasks = $this->collectArchivedTasks();
$n_cache = $this->collectGeneralCaches();
$collected = array(
'Herald Transcript' => $n_herald,
@ -56,6 +57,7 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
'Differential Parse Cache' => $n_parse,
'Markup Cache' => $n_markup,
'Archived Tasks' => $n_tasks,
'General Cache Entries' => $n_cache,
);
$collected = array_filter($collected);
@ -200,4 +202,25 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon {
return count($task_ids);
}
private function collectGeneralCaches() {
$key = 'gcdaemon.ttl.general-cache';
$ttl = PhabricatorEnv::getEnvConfig($key);
if ($ttl <= 0) {
return 0;
}
$cache = new PhabricatorKeyValueDatabaseCache();
$conn_w = $cache->establishConnection('w');
queryfx(
$conn_w,
'DELETE FROM %T WHERE cacheCreated < %d
ORDER BY cacheCreated ASC LIMIT 100',
$cache->getTableName(),
time() - $ttl);
return $conn_w->getAffectedRows();
}
}

View file

@ -148,4 +148,63 @@ abstract class PhabricatorLiskDAO extends LiskDAO {
protected function getConnectionNamespace() {
return self::getStorageNamespace().'_'.$this->getApplicationName();
}
/**
* Break a list of escaped SQL statement fragments (e.g., VALUES lists for
* INSERT, previously built with @{function:qsprintf}) into chunks which will
* fit under the MySQL 'max_allowed_packet' limit.
*
* Chunks are glued together with `$glue`, by default ", ".
*
* If a statement is too large to fit within the limit, it is broken into
* its own chunk (but might fail when the query executes).
*/
public static function chunkSQL(
array $fragments,
$glue = ', ',
$limit = null) {
if ($limit === null) {
// NOTE: Hard-code this at 1MB for now, minus a 10% safety buffer.
// Eventually we could query MySQL or let the user configure it.
$limit = (int)((1024 * 1024) * 0.90);
}
$result = array();
$chunk = array();
$len = 0;
$glue_len = strlen($glue);
foreach ($fragments as $fragment) {
$this_len = strlen($fragment);
if ($chunk) {
// Chunks after the first also imply glue.
$this_len += $glue_len;
}
if ($len + $this_len <= $limit) {
$len += $this_len;
$chunk[] = $fragment;
} else {
if ($chunk) {
$result[] = $chunk;
}
$len = strlen($fragment);
$chunk = array($fragment);
}
}
if ($chunk) {
$result[] = $chunk;
}
foreach ($result as $key => $fragment_list) {
$result[$key] = implode($glue, $fragment_list);
}
return $result;
}
}

View file

@ -0,0 +1,55 @@
<?php
final class LiskChunkTestCase extends PhabricatorTestCase {
public function testSQLChunking() {
$fragments = array(
'a', 'a',
'b', 'b',
'ccc',
'dd',
'e',
);
$this->assertEqual(
array(
'aa',
'bb',
'ccc',
'dd',
'e',
),
PhabricatorLiskDAO::chunkSQL($fragments, '', 2));
$fragments = array(
'a', 'a', 'a', 'XX', 'a', 'a', 'a', 'a'
);
$this->assertEqual(
array(
'a, a, a',
'XX, a, a',
'a, a',
),
PhabricatorLiskDAO::chunkSQL($fragments, ', ', 8));
$fragments = array(
'xxxxxxxxxx',
'yyyyyyyyyy',
'a', 'b', 'c',
'zzzzzzzzzz',
);
$this->assertEqual(
array(
'xxxxxxxxxx',
'yyyyyyyyyy',
'a, b, c',
'zzzzzzzzzz',
),
PhabricatorLiskDAO::chunkSQL($fragments, ', ', 8));
}
}

View file

@ -1060,6 +1060,10 @@ final class PhabricatorBuiltinPatchList extends PhabricatorSQLPatchList {
'type' => 'sql',
'name' => $this->getPatchPath('20121209.xmacromigratekey.sql'),
),
'20121220.generalcache.sql' => array(
'type' => 'sql',
'name' => $this->getPatchPath('20121220.generalcache.sql'),
),
);
}