From f7464400a51044dc6f6cdbfb1564532ff17c3797 Mon Sep 17 00:00:00 2001 From: epriestley Date: Mon, 16 Dec 2013 12:37:32 -0800 Subject: [PATCH] Limit memory usage of `ssh-exec` during large pull operations Summary: Fixes T4241. Ref T4206. See T4241 for a description here. Generally, when we connect a fat pipe (`git-upload-pack`) to a narrow one (`git` over SSH) we currently read limitless data into memory. Instead, throttle reads until writes catch up. This is now possible because of the previous changes in this sequence. Test Plan: - Ran `git clone` and `git push` on the entire Wine repository. - Observed CPU and memory usage. - Memory usage was constant and low, CPU usage was high only during I/O (which is expected, since we have to actually do work, although thre might be room to further reduce this). Reviewers: btrahan Reviewed By: btrahan CC: aran Maniphest Tasks: T4241, T4206 Differential Revision: https://secure.phabricator.com/D7776 --- .../ssh/PhabricatorSSHPassthruCommand.php | 48 +++++++++++++++++-- 1 file changed, 45 insertions(+), 3 deletions(-) diff --git a/src/infrastructure/ssh/PhabricatorSSHPassthruCommand.php b/src/infrastructure/ssh/PhabricatorSSHPassthruCommand.php index 5d06490b64..e05e0c7fae 100644 --- a/src/infrastructure/ssh/PhabricatorSSHPassthruCommand.php +++ b/src/infrastructure/ssh/PhabricatorSSHPassthruCommand.php @@ -97,6 +97,18 @@ final class PhabricatorSSHPassthruCommand extends Phobject { $channels = array($command_channel, $io_channel, $error_channel); + // We want to limit the amount of data we'll hold in memory for this + // process. See T4241 for a discussion of this issue in general. + + $buffer_size = (1024 * 1024); // 1MB + $io_channel->setReadBufferSize($buffer_size); + $command_channel->setReadBufferSize($buffer_size); + + // TODO: This just makes us throw away stderr after the first 1MB, but we + // don't currently have the support infrastructure to buffer it correctly. + // It's difficult to imagine this causing problems in practice, though. + $this->execFuture->getStderrSizeLimit($buffer_size); + while (true) { PhutilChannel::waitForAny($channels); @@ -104,7 +116,29 @@ final class PhabricatorSSHPassthruCommand extends Phobject { $command_channel->update(); $error_channel->update(); - $done = !$command_channel->isOpen(); + // If any channel is blocked on the other end, wait for it to flush before + // we continue reading. For example, if a user is running `git clone` on + // a 1GB repository, the underlying `git-upload-pack` may + // be able to produce data much more quickly than we can send it over + // the network. If we don't throttle the reads, we may only send a few + // MB over the I/O channel in the time it takes to read the entire 1GB off + // the command channel. That leaves us with 1GB of data in memory. + + while ($command_channel->isOpen() && + $io_channel->isOpenForWriting() && + ($command_channel->getWriteBufferSize() >= $buffer_size || + $io_channel->getWriteBufferSize() >= $buffer_size || + $error_channel->getWriteBufferSize() >= $buffer_size)) { + PhutilChannel::waitForActivity(array(), $channels); + $io_channel->update(); + $command_channel->update(); + $error_channel->update(); + } + + // If the subprocess has exited and we've read everything from it, + // we're all done. + $done = !$command_channel->isOpenForReading() && + $command_channel->isReadBufferEmpty(); $in_message = $io_channel->read(); if ($in_message !== null) { @@ -133,12 +167,20 @@ final class PhabricatorSSHPassthruCommand extends Phobject { // If the client has disconnected, kill the subprocess and bail. if (!$io_channel->isOpenForWriting()) { - $this->execFuture->resolveKill(); + $this->execFuture + ->setStdoutSizeLimit(0) + ->setStderrSizeLimit(0) + ->setReadBufferSize(null) + ->resolveKill(); break; } } - list($err) = $this->execFuture->resolve(); + list($err) = $this->execFuture + ->setStdoutSizeLimit(0) + ->setStderrSizeLimit(0) + ->setReadBufferSize(null) + ->resolve(); return $err; }