From aae5f9efd3d3b5fa238acc74d6b58727eb5c8fb4 Mon Sep 17 00:00:00 2001 From: epriestley Date: Fri, 21 Dec 2012 14:17:56 -0800 Subject: [PATCH] Implement a more compact, general database-backed key-value cache Summary: See discussion in D4204. Facebook currently has a 314MB remarkup cache with a 55MB index, which is slow to access. Under the theory that this is an index size/quality problem (the current index is on a potentially-384-byte field, with many keys sharing prefixes), provide a more general index with fancy new features: - It implements PhutilKeyValueCache, so it can be a component in cache stacks and supports TTL. - It has a 12-byte hash-based key. - It automatically compresses large blocks of data (most of what we store is highly-compressible HTML). Test Plan: - Basics: - Loaded /paste/, saw caches generate and save. - Reloaded /paste/, saw the page hit cache. - GC: - Ran GC daemon, saw nothing. - Set maximum lifetime to 1 second, ran GC daemon, saw it collect the entire cache. - Deflate: - Selected row formats from the database, saw a mixture of 'raw' and 'deflate' storage. - Used profiler to verify that 'deflate' is fast (12 calls @ 220us on my paste list). - Ran unit tests Reviewers: vrana, btrahan Reviewed By: vrana CC: aran Differential Revision: https://secure.phabricator.com/D4259 --- conf/default.conf.php | 8 + .../sql/patches/20121220.generalcache.sql | 11 + src/__phutil_library_map__.php | 4 + .../PhabricatorKeyValueDatabaseCache.php | 232 ++++++++++++++++++ .../paste/query/PhabricatorPasteQuery.php | 24 +- .../PhabricatorGarbageCollectorDaemon.php | 23 ++ .../storage/lisk/PhabricatorLiskDAO.php | 59 +++++ .../lisk/__tests__/LiskChunkTestCase.php | 55 +++++ .../patch/PhabricatorBuiltinPatchList.php | 4 + 9 files changed, 407 insertions(+), 13 deletions(-) create mode 100644 resources/sql/patches/20121220.generalcache.sql create mode 100644 src/applications/cache/PhabricatorKeyValueDatabaseCache.php create mode 100644 src/infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php diff --git a/conf/default.conf.php b/conf/default.conf.php index 077dd9677e..0e3a281df5 100644 --- a/conf/default.conf.php +++ b/conf/default.conf.php @@ -1130,6 +1130,13 @@ return array( 'remarkup.enable-embedded-youtube' => false, +// -- Cache ----------------------------------------------------------------- // + + // Set this to false to disable the use of gzdeflate()-based compression in + // some caches. This may give you less performant (but more debuggable) + // caching. + 'cache.enable-deflate' => true, + // -- Garbage Collection ---------------------------------------------------- // // Phabricator generates various logs and caches in the database which can @@ -1160,6 +1167,7 @@ return array( 'gcdaemon.ttl.differential-parse-cache' => 14 * (24 * 60 * 60), 'gcdaemon.ttl.markup-cache' => 30 * (24 * 60 * 60), 'gcdaemon.ttl.task-archive' => 14 * (24 * 60 * 60), + 'gcdaemon.ttl.general-cache' => 30 * (24 * 60 * 60), // -- Feed ------------------------------------------------------------------ // diff --git a/resources/sql/patches/20121220.generalcache.sql b/resources/sql/patches/20121220.generalcache.sql new file mode 100644 index 0000000000..acfa1cf57a --- /dev/null +++ b/resources/sql/patches/20121220.generalcache.sql @@ -0,0 +1,11 @@ +CREATE TABLE {$NAMESPACE}_cache.cache_general ( + id BIGINT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, + cacheKeyHash CHAR(12) BINARY NOT NULL, + cacheKey VARCHAR(128) NOT NULL COLLATE utf8_bin, + cacheFormat VARCHAR(16) NOT NULL COLLATE utf8_bin, + cacheData LONGBLOB NOT NULL, + cacheCreated INT UNSIGNED NOT NULL, + cacheExpires INT UNSIGNED, + KEY `key_cacheCreated` (cacheCreated), + UNIQUE KEY `key_cacheKeyHash` (cacheKeyHash) +) ENGINE=InnoDB, COLLATE utf8_general_ci; diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index dabebdd56f..3d18f15d0f 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -505,6 +505,7 @@ phutil_register_library_map(array( 'JavelinUIExample' => 'applications/uiexample/examples/JavelinUIExample.php', 'JavelinViewExample' => 'applications/uiexample/examples/JavelinViewExample.php', 'JavelinViewExampleServerView' => 'applications/uiexample/examples/JavelinViewExampleServerView.php', + 'LiskChunkTestCase' => 'infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php', 'LiskDAO' => 'infrastructure/storage/lisk/LiskDAO.php', 'LiskDAOSet' => 'infrastructure/storage/lisk/LiskDAOSet.php', 'LiskDAOTestCase' => 'infrastructure/storage/lisk/__tests__/LiskDAOTestCase.php', @@ -835,6 +836,7 @@ phutil_register_library_map(array( 'PhabricatorInlineSummaryView' => 'infrastructure/diff/view/PhabricatorInlineSummaryView.php', 'PhabricatorJavelinLinter' => 'infrastructure/lint/linter/PhabricatorJavelinLinter.php', 'PhabricatorJumpNavHandler' => 'applications/search/engine/PhabricatorJumpNavHandler.php', + 'PhabricatorKeyValueDatabaseCache' => 'applications/cache/PhabricatorKeyValueDatabaseCache.php', 'PhabricatorLDAPLoginController' => 'applications/auth/controller/PhabricatorLDAPLoginController.php', 'PhabricatorLDAPProvider' => 'applications/auth/ldap/PhabricatorLDAPProvider.php', 'PhabricatorLDAPRegistrationController' => 'applications/auth/controller/PhabricatorLDAPRegistrationController.php', @@ -1789,6 +1791,7 @@ phutil_register_library_map(array( 'JavelinUIExample' => 'PhabricatorUIExample', 'JavelinViewExample' => 'PhabricatorUIExample', 'JavelinViewExampleServerView' => 'AphrontView', + 'LiskChunkTestCase' => 'PhabricatorTestCase', 'LiskDAOTestCase' => 'PhabricatorTestCase', 'LiskEphemeralObjectException' => 'Exception', 'LiskFixtureTestCase' => 'PhabricatorTestCase', @@ -2118,6 +2121,7 @@ phutil_register_library_map(array( 'PhabricatorInlineCommentPreviewController' => 'PhabricatorController', 'PhabricatorInlineSummaryView' => 'AphrontView', 'PhabricatorJavelinLinter' => 'ArcanistLinter', + 'PhabricatorKeyValueDatabaseCache' => 'PhutilKeyValueCache', 'PhabricatorLDAPLoginController' => 'PhabricatorAuthController', 'PhabricatorLDAPRegistrationController' => 'PhabricatorAuthController', 'PhabricatorLDAPUnknownUserException' => 'Exception', diff --git a/src/applications/cache/PhabricatorKeyValueDatabaseCache.php b/src/applications/cache/PhabricatorKeyValueDatabaseCache.php new file mode 100644 index 0000000000..314a86a70a --- /dev/null +++ b/src/applications/cache/PhabricatorKeyValueDatabaseCache.php @@ -0,0 +1,232 @@ +getProfiler()) { + $call_id = $this->getProfiler()->beginServiceCall( + array( + 'type' => 'kvcache-set', + 'name' => 'phabricator-db', + 'keys' => array_keys($keys), + 'ttl' => $ttl, + )); + } + + if ($keys) { + $map = $this->digestKeys(array_keys($keys)); + $conn_w = $this->establishConnection('w'); + + $sql = array(); + foreach ($map as $key => $hash) { + $value = $keys[$key]; + + list($format, $storage_value) = $this->willWriteValue($key, $value); + + $sql[] = qsprintf( + $conn_w, + '(%s, %s, %s, %s, %d, %nd)', + $hash, + $key, + $format, + $storage_value, + time(), + $ttl ? (time() + $ttl) : null); + } + + $guard = AphrontWriteGuard::beginScopedUnguardedWrites(); + foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) { + queryfx( + $conn_w, + 'INSERT INTO %T + (cacheKeyHash, cacheKey, cacheFormat, cacheData, + cacheCreated, cacheExpires) VALUES %Q + ON DUPLICATE KEY UPDATE + cacheKey = VALUES(cacheKey), + cacheFormat = VALUES(cacheFormat), + cacheData = VALUES(cacheData), + cacheCreated = VALUES(cacheCreated), + cacheExpires = VALUES(cacheExpires)', + $this->getTableName(), + $chunk); + } + unset($guard); + } + + if ($call_id) { + $this->getProfiler()->endServiceCall($call_id, array()); + } + + return $this; + } + + public function getKeys(array $keys) { + $call_id = null; + if ($this->getProfiler()) { + $call_id = $this->getProfiler()->beginServiceCall( + array( + 'type' => 'kvcache-get', + 'name' => 'phabricator-db', + 'keys' => $keys, + )); + } + + $results = array(); + if ($keys) { + $map = $this->digestKeys($keys); + + $rows = queryfx_all( + $this->establishConnection('r'), + 'SELECT * FROM %T WHERE cacheKeyHash IN (%Ls)', + $this->getTableName(), + $map); + $rows = ipull($rows, null, 'cacheKey'); + + foreach ($keys as $key) { + if (empty($rows[$key])) { + continue; + } + + $row = $rows[$key]; + + if ($row['cacheExpires'] && ($row['cacheExpires'] < time())) { + continue; + } + + try { + $results[$key] = $this->didReadValue( + $row['cacheFormat'], + $row['cacheData']); + } catch (Exception $ex) { + // Treat this as a cache miss. + phlog($ex); + } + } + } + + if ($call_id) { + $this->getProfiler()->endServiceCall( + $call_id, + array( + 'hits' => array_keys($results), + )); + } + + return $results; + } + + public function deleteKeys(array $keys) { + $call_id = null; + if ($this->getProfiler()) { + $call_id = $this->getProfiler()->beginServiceCall( + array( + 'type' => 'kvcache-del', + 'name' => 'phabricator-db', + 'keys' => $keys, + )); + } + + if ($keys) { + $map = $this->digestKeys($keys); + queryfx( + $this->establishConnection('w'), + 'DELETE FROM %T WHERE cacheKeyHash IN (%Ls)', + $this->getTableName(), + $keys); + } + + if ($call_id) { + $this->getProfiler()->endServiceCall($call_id, array()); + } + + return $this; + } + + public function destroyCache() { + queryfx( + $this->establishConnection('w'), + 'DELETE FROM %T', + $this->getTableName()); + return $this; + } + + +/* -( Raw Cache Access )--------------------------------------------------- */ + + + public function establishConnection($mode) { + // TODO: This is the only concrete table we have on the database right + // now. + return id(new PhabricatorMarkupCache())->establishConnection($mode); + } + + public function getTableName() { + return 'cache_general'; + } + + +/* -( Implementation )----------------------------------------------------- */ + + + private function digestKeys(array $keys) { + $map = array(); + foreach ($keys as $key) { + $map[$key] = PhabricatorHash::digestForIndex($key); + } + return $map; + } + + private function willWriteValue($key, $value) { + if (!is_string($value)) { + throw new Exception("Only strings may be written to the DB cache!"); + } + + static $can_deflate; + if ($can_deflate === null) { + $can_deflate = function_exists('gzdeflate') && + PhabricatorEnv::getEnvConfig('cache.enable-deflate'); + } + + // If the value is larger than 1KB, we have gzdeflate(), we successfully + // can deflate it, and it benefits from deflation, store it deflated. + if ($can_deflate) { + $len = strlen($value); + if ($len > 1024) { + $deflated = gzdeflate($value); + if ($deflated !== false) { + $deflated_len = strlen($deflated); + if ($deflated_len < ($len / 2)) { + return array(self::CACHE_FORMAT_DEFLATE, $deflated); + } + } + } + } + + return array(self::CACHE_FORMAT_RAW, $value); + } + + private function didReadValue($format, $value) { + switch ($format) { + case self::CACHE_FORMAT_RAW: + return $value; + case self::CACHE_FORMAT_DEFLATE: + if (!function_exists('gzinflate')) { + throw new Exception("No gzinflate() to read deflated cache."); + } + $value = gzinflate($value); + if ($value === false) { + throw new Exception("Failed to deflate cache."); + } + return $value; + default: + throw new Exception("Unknown cache format."); + } + } + + +} diff --git a/src/applications/paste/query/PhabricatorPasteQuery.php b/src/applications/paste/query/PhabricatorPasteQuery.php index a8efafa915..e3ba1fb758 100644 --- a/src/applications/paste/query/PhabricatorPasteQuery.php +++ b/src/applications/paste/query/PhabricatorPasteQuery.php @@ -124,23 +124,22 @@ final class PhabricatorPasteQuery } private function loadContent(array $pastes) { + $cache = id(new PhabricatorKeyValueDatabaseCache()) + ->setProfiler(PhutilServiceProfiler::getInstance()); + $keys = array(); foreach ($pastes as $paste) { $keys[] = $this->getContentCacheKey($paste); } - // TODO: Move to a more appropriate/general cache once we have one? For - // now, this gets automatic GC. - $caches = id(new PhabricatorMarkupCache())->loadAllWhere( - 'cacheKey IN (%Ls)', - $keys); - $caches = mpull($caches, null, 'getCacheKey'); + + $caches = $cache->getKeys($keys); $need_raw = array(); foreach ($pastes as $paste) { $key = $this->getContentCacheKey($paste); if (isset($caches[$key])) { - $paste->attachContent($caches[$key]->getCacheData()); + $paste->attachContent($caches[$key]); } else { $need_raw[] = $paste; } @@ -150,18 +149,17 @@ final class PhabricatorPasteQuery return; } + $write_data = array(); + $this->loadRawContent($need_raw); foreach ($need_raw as $paste) { $content = $this->buildContent($paste); $paste->attachContent($content); - $guard = AphrontWriteGuard::beginScopedUnguardedWrites(); - id(new PhabricatorMarkupCache()) - ->setCacheKey($this->getContentCacheKey($paste)) - ->setCacheData($content) - ->replace(); - unset($guard); + $write_data[$this->getContentCacheKey($paste)] = (string)$content; } + + $cache->setKeys($write_data); } diff --git a/src/infrastructure/daemon/PhabricatorGarbageCollectorDaemon.php b/src/infrastructure/daemon/PhabricatorGarbageCollectorDaemon.php index bc909bc543..2dbba7165d 100644 --- a/src/infrastructure/daemon/PhabricatorGarbageCollectorDaemon.php +++ b/src/infrastructure/daemon/PhabricatorGarbageCollectorDaemon.php @@ -49,6 +49,7 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon { $n_parse = $this->collectParseCaches(); $n_markup = $this->collectMarkupCaches(); $n_tasks = $this->collectArchivedTasks(); + $n_cache = $this->collectGeneralCaches(); $collected = array( 'Herald Transcript' => $n_herald, @@ -56,6 +57,7 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon { 'Differential Parse Cache' => $n_parse, 'Markup Cache' => $n_markup, 'Archived Tasks' => $n_tasks, + 'General Cache Entries' => $n_cache, ); $collected = array_filter($collected); @@ -200,4 +202,25 @@ final class PhabricatorGarbageCollectorDaemon extends PhabricatorDaemon { return count($task_ids); } + + private function collectGeneralCaches() { + $key = 'gcdaemon.ttl.general-cache'; + $ttl = PhabricatorEnv::getEnvConfig($key); + if ($ttl <= 0) { + return 0; + } + + $cache = new PhabricatorKeyValueDatabaseCache(); + $conn_w = $cache->establishConnection('w'); + + queryfx( + $conn_w, + 'DELETE FROM %T WHERE cacheCreated < %d + ORDER BY cacheCreated ASC LIMIT 100', + $cache->getTableName(), + time() - $ttl); + + return $conn_w->getAffectedRows(); + } + } diff --git a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php index f692a9653f..13092ca3fc 100644 --- a/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php +++ b/src/infrastructure/storage/lisk/PhabricatorLiskDAO.php @@ -148,4 +148,63 @@ abstract class PhabricatorLiskDAO extends LiskDAO { protected function getConnectionNamespace() { return self::getStorageNamespace().'_'.$this->getApplicationName(); } + + + /** + * Break a list of escaped SQL statement fragments (e.g., VALUES lists for + * INSERT, previously built with @{function:qsprintf}) into chunks which will + * fit under the MySQL 'max_allowed_packet' limit. + * + * Chunks are glued together with `$glue`, by default ", ". + * + * If a statement is too large to fit within the limit, it is broken into + * its own chunk (but might fail when the query executes). + */ + public static function chunkSQL( + array $fragments, + $glue = ', ', + $limit = null) { + + if ($limit === null) { + // NOTE: Hard-code this at 1MB for now, minus a 10% safety buffer. + // Eventually we could query MySQL or let the user configure it. + $limit = (int)((1024 * 1024) * 0.90); + } + + $result = array(); + + $chunk = array(); + $len = 0; + $glue_len = strlen($glue); + foreach ($fragments as $fragment) { + $this_len = strlen($fragment); + + if ($chunk) { + // Chunks after the first also imply glue. + $this_len += $glue_len; + } + + if ($len + $this_len <= $limit) { + $len += $this_len; + $chunk[] = $fragment; + } else { + if ($chunk) { + $result[] = $chunk; + } + $len = strlen($fragment); + $chunk = array($fragment); + } + } + + if ($chunk) { + $result[] = $chunk; + } + + foreach ($result as $key => $fragment_list) { + $result[$key] = implode($glue, $fragment_list); + } + + return $result; + } + } diff --git a/src/infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php b/src/infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php new file mode 100644 index 0000000000..91e50a81dd --- /dev/null +++ b/src/infrastructure/storage/lisk/__tests__/LiskChunkTestCase.php @@ -0,0 +1,55 @@ +assertEqual( + array( + 'aa', + 'bb', + 'ccc', + 'dd', + 'e', + ), + PhabricatorLiskDAO::chunkSQL($fragments, '', 2)); + + + $fragments = array( + 'a', 'a', 'a', 'XX', 'a', 'a', 'a', 'a' + ); + + $this->assertEqual( + array( + 'a, a, a', + 'XX, a, a', + 'a, a', + ), + PhabricatorLiskDAO::chunkSQL($fragments, ', ', 8)); + + + $fragments = array( + 'xxxxxxxxxx', + 'yyyyyyyyyy', + 'a', 'b', 'c', + 'zzzzzzzzzz', + ); + + $this->assertEqual( + array( + 'xxxxxxxxxx', + 'yyyyyyyyyy', + 'a, b, c', + 'zzzzzzzzzz', + ), + PhabricatorLiskDAO::chunkSQL($fragments, ', ', 8)); + } + +} diff --git a/src/infrastructure/storage/patch/PhabricatorBuiltinPatchList.php b/src/infrastructure/storage/patch/PhabricatorBuiltinPatchList.php index 2d1cae1c24..458cbea77f 100644 --- a/src/infrastructure/storage/patch/PhabricatorBuiltinPatchList.php +++ b/src/infrastructure/storage/patch/PhabricatorBuiltinPatchList.php @@ -1060,6 +1060,10 @@ final class PhabricatorBuiltinPatchList extends PhabricatorSQLPatchList { 'type' => 'sql', 'name' => $this->getPatchPath('20121209.xmacromigratekey.sql'), ), + '20121220.generalcache.sql' => array( + 'type' => 'sql', + 'name' => $this->getPatchPath('20121220.generalcache.sql'), + ), ); }