From 877e7b63884d69745fcc1e695fdf1b86ccb2d7a4 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 27 May 2015 10:25:53 -0700 Subject: [PATCH] Move Conduit file upload logic into a separate class Summary: Ref T8259. Currently, `arc upload` uses new logic but `arc diff` uses older logic internally. This prevents `arc diff` from uploading files larger than 4MB to newer servers. Split the upload logic apart so the two upload pathways can share it. Callers now build a list of FileDataRefs and hand them to an Uploader for upload. Test Plan: Ran `arc upload` on: - One file. - Several files. - Small files. - Big files. - Directories. - Unreadable files. - Files full of random data. - The same file over and over again. - The same big file over and over again. - Artificially broke `file.allocate` and redid some of the simple cases (large/chunked aren't expected to work in this case). Reviewers: btrahan Reviewed By: btrahan Subscribers: joshuaspence, epriestley Maniphest Tasks: T8259 Differential Revision: https://secure.phabricator.com/D13016 --- src/__phutil_library_map__.php | 4 + src/upload/ArcanistFileDataRef.php | 289 ++++++++++++++++++++++++ src/upload/ArcanistFileUploader.php | 257 +++++++++++++++++++++ src/workflow/ArcanistUploadWorkflow.php | 84 ++----- 4 files changed, 568 insertions(+), 66 deletions(-) create mode 100644 src/upload/ArcanistFileDataRef.php create mode 100644 src/upload/ArcanistFileUploader.php diff --git a/src/__phutil_library_map__.php b/src/__phutil_library_map__.php index 5235f69a..24957b24 100644 --- a/src/__phutil_library_map__.php +++ b/src/__phutil_library_map__.php @@ -69,6 +69,8 @@ phutil_register_library_map(array( 'ArcanistExternalLinter' => 'lint/linter/ArcanistExternalLinter.php', 'ArcanistExternalLinterTestCase' => 'lint/linter/__tests__/ArcanistExternalLinterTestCase.php', 'ArcanistFeatureWorkflow' => 'workflow/ArcanistFeatureWorkflow.php', + 'ArcanistFileDataRef' => 'upload/ArcanistFileDataRef.php', + 'ArcanistFileUploader' => 'upload/ArcanistFileUploader.php', 'ArcanistFilenameLinter' => 'lint/linter/ArcanistFilenameLinter.php', 'ArcanistFilenameLinterTestCase' => 'lint/linter/__tests__/ArcanistFilenameLinterTestCase.php', 'ArcanistFlagWorkflow' => 'workflow/ArcanistFlagWorkflow.php', @@ -258,6 +260,8 @@ phutil_register_library_map(array( 'ArcanistExternalLinter' => 'ArcanistFutureLinter', 'ArcanistExternalLinterTestCase' => 'ArcanistLinterTestCase', 'ArcanistFeatureWorkflow' => 'ArcanistWorkflow', + 'ArcanistFileDataRef' => 'Phobject', + 'ArcanistFileUploader' => 'Phobject', 'ArcanistFilenameLinter' => 'ArcanistLinter', 'ArcanistFilenameLinterTestCase' => 'ArcanistLinterTestCase', 'ArcanistFlagWorkflow' => 'ArcanistWorkflow', diff --git a/src/upload/ArcanistFileDataRef.php b/src/upload/ArcanistFileDataRef.php new file mode 100644 index 00000000..053c6b57 --- /dev/null +++ b/src/upload/ArcanistFileDataRef.php @@ -0,0 +1,289 @@ +name = $name; + return $this; + } + + + /** + * @task config + */ + public function getName() { + return $this->name; + } + + + /** + * @task config + */ + public function setData($data) { + $this->data = $data; + return $this; + } + + + /** + * @task config + */ + public function getData() { + return $this->data; + } + + + /** + * @task config + */ + public function setPath($path) { + $this->path = $path; + return $this; + } + + + /** + * @task config + */ + public function getPath() { + return $this->path; + } + + +/* -( Handling Upload Results )-------------------------------------------- */ + + + /** + * @task results + */ + public function getErrors() { + return $this->errors; + } + + + /** + * @task results + */ + public function getPHID() { + return $this->phid; + } + + +/* -( Uploader API )------------------------------------------------------- */ + + + /** + * @task uploader + */ + public function willUpload() { + $have_data = ($this->data !== null); + $have_path = ($this->path !== null); + + if (!$have_data && !$have_path) { + throw new Exception( + pht( + 'Specify setData() or setPath() when building a file data '. + 'reference.')); + } + + if ($have_data && $have_path) { + throw new Exception( + pht( + 'Specify either setData() or setPath() when building a file data '. + 'reference, but not both.')); + } + + if ($have_path) { + $path = $this->path; + + if (!Filesystem::pathExists($path)) { + throw new Exception( + pht( + 'Unable to upload file: path "%s" does not exist.', + $path)); + } + + try { + Filesystem::assertIsFile($path); + } catch (FilesystemException $ex) { + throw new Exception( + pht( + 'Unable to upload file: path "%s" is not a file.', + $path)); + } + + try { + Filesystem::assertReadable($path); + } catch (FilesystemException $ex) { + throw new Exception( + pht( + 'Unable to upload file: path "%s" is not readable.', + $path)); + } + + $hash = @sha1_file($path); + if ($hash === false) { + throw new Exception( + pht( + 'Unable to upload file: failed to calculate file data hash for '. + 'path "%s".', + $path)); + } + + $size = @filesize($path); + if ($size === false) { + throw new Exception( + pht( + 'Unable to upload file: failed to determine filesize of '. + 'path "%s".', + $path)); + } + + $this->hash = $hash; + $this->size = $size; + } else { + $data = $this->data; + $this->hash = sha1($data); + $this->size = strlen($data); + } + } + + + /** + * @task uploader + */ + public function didFail($error) { + $this->errors[] = $error; + return $this; + } + + + /** + * @task uploader + */ + public function setPHID($phid) { + $this->phid = $phid; + return $this; + } + + + /** + * @task uploader + */ + public function getByteSize() { + if ($this->size === null) { + throw new PhutilInvalidStateException('willUpload'); + } + return $this->size; + } + + + /** + * @task uploader + */ + public function getContentHash() { + if ($this->size === null) { + throw new PhutilInvalidStateException('willUpload'); + } + return $this->hash; + } + + + /** + * @task uploader + */ + public function didUpload() { + if ($this->fileHandle) { + @fclose($this->fileHandle); + $this->fileHandle = null; + } + } + + + /** + * @task uploader + */ + public function readBytes($start, $end) { + if ($this->size === null) { + throw new PhutilInvalidStateException('willUpload'); + } + + $len = ($end - $start); + + if ($this->data !== null) { + return substr($this->data, $start, $len); + } + + $path = $this->path; + + if ($this->fileHandle === null) { + $f = @fopen($path, 'rb'); + if (!$f) { + throw new Exception( + pht( + 'Unable to upload file: failed to open path "%s" for reading.', + $path)); + } + $this->fileHandle = $f; + } + + $f = $this->fileHandle; + + $ok = @fseek($f, $start); + if ($ok !== 0) { + throw new Exception( + pht( + 'Unable to upload file: failed to fseek() to offset %d in file '. + 'at path "%s".', + $start, + $path)); + } + + $data = @fread($f, $len); + if ($data === false) { + throw new Exception( + pht( + 'Unable to upload file: failed to read %d bytes after offset %d '. + 'from file at path "%s".', + $len, + $start, + $path)); + } + + return $data; + } + +} diff --git a/src/upload/ArcanistFileUploader.php b/src/upload/ArcanistFileUploader.php new file mode 100644 index 00000000..a9104d7b --- /dev/null +++ b/src/upload/ArcanistFileUploader.php @@ -0,0 +1,257 @@ +setConduitClient($conduit); + * + * // Queue one or more files to be uploaded. + * $file = id(new ArcanistFileDataRef()) + * ->setName('example.jpg') + * ->setPath('/path/to/example.jpg'); + * $uploader->addFile($file); + * + * // Upload the files. + * $files = $uploader->uploadFiles(); + * + * For details about building file references, see @{class:ArcanistFileDataRef}. + * + * @task config Configuring the Uploader + * @task add Adding Files + * @task upload Uploading Files + * @task internal Internals + */ +final class ArcanistFileUploader extends Phobject { + + private $conduit; + private $files; + + +/* -( Configuring the Uploader )------------------------------------------- */ + + + /** + * @task config + */ + public function setConduitClient(ConduitClient $conduit) { + $this->conduit = $conduit; + return $this; + } + + +/* -( Adding Files )------------------------------------------------------- */ + + + /** + * @task add + */ + public function addFile(ArcanistFileDataRef $file) { + $this->files[] = $file; + return $this; + } + + +/* -( Uploading Files )---------------------------------------------------- */ + + + /** + * @task upload + */ + public function uploadFiles() { + if (!$this->conduit) { + throw new PhutilInvalidStateException('setConduitClient'); + } + + $files = $this->files; + foreach ($files as $key => $file) { + try { + $file->willUpload(); + } catch (Exception $ex) { + $file->didFail($ex->getMessage()); + unset($files[$key]); + } + } + + $conduit = $this->conduit; + $futures = array(); + foreach ($files as $key => $file) { + $futures[$key] = $conduit->callMethod( + 'file.allocate', + array( + 'name' => $file->getName(), + 'contentLength' => $file->getByteSize(), + 'contentHash' => $file->getContentHash(), + )); + } + + $iterator = id(new FutureIterator($futures))->limit(4); + $chunks = array(); + foreach ($iterator as $key => $future) { + try { + $result = $future->resolve(); + } catch (Exception $ex) { + // The most likely cause for a failure here is that the server does + // not support `file.allocate`. In this case, we'll try the older + // upload method below. + continue; + } + + $phid = $result['filePHID']; + $file = $files[$key]; + + // We don't need to upload any data. Figure out why not: this can either + // be because of an error (server can't accept the data) or because the + // server already has the data. + if (!$result['upload']) { + if (!$phid) { + $file->didFail( + pht( + 'Unable to upload file: the server refused to accept file '. + '"%s". This usually means it is too large.', + $file->getName())); + } else { + // These server completed the upload by creating a reference to known + // file data. We don't need to transfer the actual data, and are all + // set. + $file->setPHID($phid); + } + unset($files[$key]); + continue; + } + + // The server wants us to do an upload. + if ($phid) { + $chunks[$key] = array( + 'file' => $file, + 'phid' => $phid, + ); + } + } + + foreach ($chunks as $key => $chunk) { + $file = $chunk['file']; + $phid = $chunk['phid']; + try { + $this->uploadChunks($file, $phid); + $file->setPHID($phid); + } catch (Exception $ex) { + $file->didFail( + pht( + 'Unable to upload file chunks: %s', + $ex->getMessage())); + } + unset($files[$key]); + } + + foreach ($files as $key => $file) { + try { + $phid = $this->uploadData($file); + $file->setPHID($phid); + } catch (Exception $ex) { + $file->didFail( + pht( + 'Unable to upload file data: %s', + $ex->getMessage())); + } + unset($files[$key]); + } + + foreach ($this->files as $file) { + $file->didUpload(); + } + + return $this->files; + } + + +/* -( Internals )---------------------------------------------------------- */ + + + /** + * @task internal + */ + private function uploadChunks(ArcanistFileDataRef $file, $file_phid) { + $conduit = $this->conduit; + + $chunks = $conduit->callMethodSynchronous( + 'file.querychunks', + array( + 'filePHID' => $file_phid, + )); + + $remaining = array(); + foreach ($chunks as $chunk) { + if (!$chunk['complete']) { + $remaining[] = $chunk; + } + } + + $done = (count($chunks) - count($remaining)); + + if ($done) { + $this->writeStatus( + pht( + 'Resuming upload (%d of %d chunks remain).', + new PhutilNumber(count($remaining)), + new PhutilNumber(count($chunks)))); + } else { + $this->writeStatus( + pht( + 'Uploading chunks (%d chunks to upload).', + new PhutilNumber(count($remaining)))); + } + + $progress = new PhutilConsoleProgressBar(); + $progress->setTotal(count($chunks)); + + for ($ii = 0; $ii < $done; $ii++) { + $progress->update(1); + } + + $progress->draw(); + + // TODO: We could do these in parallel to improve upload performance. + foreach ($remaining as $chunk) { + $data = $file->readBytes($chunk['byteStart'], $chunk['byteEnd']); + + $conduit->callMethodSynchronous( + 'file.uploadchunk', + array( + 'filePHID' => $file_phid, + 'byteStart' => $chunk['byteStart'], + 'dataEncoding' => 'base64', + 'data' => base64_encode($data), + )); + + $progress->update(1); + } + } + + + /** + * @task internal + */ + private function uploadData(ArcanistFileDataRef $file) { + $conduit = $this->conduit; + + $data = $file->readBytes(0, $file->getByteSize()); + + return $conduit->callMethodSynchronous( + 'file.upload', + array( + 'name' => $file->getName(), + 'data_base64' => base64_encode($data), + )); + } + + + /** + * @task internal + */ + private function writeStatus($message) { + echo $message."\n"; + } + +} diff --git a/src/workflow/ArcanistUploadWorkflow.php b/src/workflow/ArcanistUploadWorkflow.php index b1c711cb..64c5260d 100644 --- a/src/workflow/ArcanistUploadWorkflow.php +++ b/src/workflow/ArcanistUploadWorkflow.php @@ -54,76 +54,28 @@ EOTEXT $conduit = $this->getConduit(); $results = array(); + $uploader = id(new ArcanistFileUploader()) + ->setConduitClient($conduit); + foreach ($this->paths as $path) { - $path = Filesystem::resolvePath($path); + $file = id(new ArcanistFileDataRef()) + ->setName(basename($path)) + ->setPath($path); - $name = basename($path); - $this->writeStatus(pht("Uploading '%s'...", $name)); + $uploader->addFile($file); + } - $hash = @sha1_file($path); - if (!$hash) { - throw new Exception(pht('Unable to read file "%s"!', $path)); - } - $length = filesize($path); - - $do_chunk_upload = false; - - $phid = null; - try { - $result = $conduit->callMethodSynchronous( - 'file.allocate', - array( - 'name' => $name, - 'contentLength' => $length, - 'contentHash' => $hash, - )); - - $phid = $result['filePHID']; - if (!$result['upload']) { - if (!$phid) { - $this->writeStatus( - pht( - 'Unable to upload file "%s": the server refused to accept '. - 'it. This usually means it is too large.', - $name)); - continue; - } - // Otherwise, the server completed the upload by referencing known - // file data. - } else { - if ($phid) { - $do_chunk_upload = true; - } else { - // This is a small file that doesn't need to be uploaded in - // chunks, so continue normally. - } - } - } catch (Exception $ex) { - $this->writeStatus( - pht('Unable to use allocate method, trying older upload method.')); - } - - if ($do_chunk_upload) { - $this->uploadChunks($phid, $path); - } - - if (!$phid) { - try { - $data = Filesystem::readFile($path); - } catch (FilesystemException $ex) { - $this->writeStatus( - pht('Unable to read file "%s".', $ex->getMessage())); - $results[$path] = null; - continue; - } - - $phid = $conduit->callMethodSynchronous( - 'file.upload', - array( - 'data_base64' => base64_encode($data), - 'name' => $name, - )); + $files = $uploader->uploadFiles(); + + $results = array(); + foreach ($files as $file) { + // TODO: This could be handled more gracefully; just preserving behavior + // until we introduce `file.query` and modernize this. + if ($file->getErrors()) { + throw new Exception(implode("\n", $file->getErrors())); } + $phid = $file->getPHID(); + $name = $file->getName(); $info = $conduit->callMethodSynchronous( 'file.info',