1
0
Fork 0
mirror of https://we.phorge.it/source/arcanist.git synced 2025-01-23 13:08:17 +01:00

Move Conduit file upload logic into a separate class

Summary:
Ref T8259. Currently, `arc upload` uses new logic but `arc diff` uses older logic internally. This prevents `arc diff` from uploading files larger than 4MB to newer servers.

Split the upload logic apart so the two upload pathways can share it. Callers now build a list of FileDataRefs and hand them to an Uploader for upload.

Test Plan:
Ran `arc upload` on:

  - One file.
  - Several files.
  - Small files.
  - Big files.
  - Directories.
  - Unreadable files.
  - Files full of random data.
  - The same file over and over again.
  - The same big file over and over again.
  - Artificially broke `file.allocate` and redid some of the simple cases (large/chunked aren't expected to work in this case).

Reviewers: btrahan

Reviewed By: btrahan

Subscribers: joshuaspence, epriestley

Maniphest Tasks: T8259

Differential Revision: https://secure.phabricator.com/D13016
This commit is contained in:
epriestley 2015-05-27 10:25:53 -07:00
parent 407af00ef6
commit 877e7b6388
4 changed files with 568 additions and 66 deletions

View file

@ -69,6 +69,8 @@ phutil_register_library_map(array(
'ArcanistExternalLinter' => 'lint/linter/ArcanistExternalLinter.php',
'ArcanistExternalLinterTestCase' => 'lint/linter/__tests__/ArcanistExternalLinterTestCase.php',
'ArcanistFeatureWorkflow' => 'workflow/ArcanistFeatureWorkflow.php',
'ArcanistFileDataRef' => 'upload/ArcanistFileDataRef.php',
'ArcanistFileUploader' => 'upload/ArcanistFileUploader.php',
'ArcanistFilenameLinter' => 'lint/linter/ArcanistFilenameLinter.php',
'ArcanistFilenameLinterTestCase' => 'lint/linter/__tests__/ArcanistFilenameLinterTestCase.php',
'ArcanistFlagWorkflow' => 'workflow/ArcanistFlagWorkflow.php',
@ -258,6 +260,8 @@ phutil_register_library_map(array(
'ArcanistExternalLinter' => 'ArcanistFutureLinter',
'ArcanistExternalLinterTestCase' => 'ArcanistLinterTestCase',
'ArcanistFeatureWorkflow' => 'ArcanistWorkflow',
'ArcanistFileDataRef' => 'Phobject',
'ArcanistFileUploader' => 'Phobject',
'ArcanistFilenameLinter' => 'ArcanistLinter',
'ArcanistFilenameLinterTestCase' => 'ArcanistLinterTestCase',
'ArcanistFlagWorkflow' => 'ArcanistWorkflow',

View file

@ -0,0 +1,289 @@
<?php
/**
* Reference to a file or block of file data which can be uploaded using
* @{class:ArcanistFileUploader}.
*
* You can either upload a file on disk by using @{method:setPath}, or upload
* a block of data in memory by using @{method:setData}.
*
* For usage examples, see @{class:ArcanistFileUploader}.
*
* After uploading, successful uploads will have @{method:getPHID} populated.
* Failed uploads will have @{method:getErrors} populated with a description
* of reasons for failure.
*
* @task config Configuring File References
* @task results Handling Upload Results
* @task uploader Uploader API
*/
final class ArcanistFileDataRef extends Phobject {
private $name;
private $data;
private $path;
private $hash;
private $size;
private $errors = array();
private $phid;
private $fileHandle;
/* -( Configuring File References )---------------------------------------- */
/**
* @task config
*/
public function setName($name) {
$this->name = $name;
return $this;
}
/**
* @task config
*/
public function getName() {
return $this->name;
}
/**
* @task config
*/
public function setData($data) {
$this->data = $data;
return $this;
}
/**
* @task config
*/
public function getData() {
return $this->data;
}
/**
* @task config
*/
public function setPath($path) {
$this->path = $path;
return $this;
}
/**
* @task config
*/
public function getPath() {
return $this->path;
}
/* -( Handling Upload Results )-------------------------------------------- */
/**
* @task results
*/
public function getErrors() {
return $this->errors;
}
/**
* @task results
*/
public function getPHID() {
return $this->phid;
}
/* -( Uploader API )------------------------------------------------------- */
/**
* @task uploader
*/
public function willUpload() {
$have_data = ($this->data !== null);
$have_path = ($this->path !== null);
if (!$have_data && !$have_path) {
throw new Exception(
pht(
'Specify setData() or setPath() when building a file data '.
'reference.'));
}
if ($have_data && $have_path) {
throw new Exception(
pht(
'Specify either setData() or setPath() when building a file data '.
'reference, but not both.'));
}
if ($have_path) {
$path = $this->path;
if (!Filesystem::pathExists($path)) {
throw new Exception(
pht(
'Unable to upload file: path "%s" does not exist.',
$path));
}
try {
Filesystem::assertIsFile($path);
} catch (FilesystemException $ex) {
throw new Exception(
pht(
'Unable to upload file: path "%s" is not a file.',
$path));
}
try {
Filesystem::assertReadable($path);
} catch (FilesystemException $ex) {
throw new Exception(
pht(
'Unable to upload file: path "%s" is not readable.',
$path));
}
$hash = @sha1_file($path);
if ($hash === false) {
throw new Exception(
pht(
'Unable to upload file: failed to calculate file data hash for '.
'path "%s".',
$path));
}
$size = @filesize($path);
if ($size === false) {
throw new Exception(
pht(
'Unable to upload file: failed to determine filesize of '.
'path "%s".',
$path));
}
$this->hash = $hash;
$this->size = $size;
} else {
$data = $this->data;
$this->hash = sha1($data);
$this->size = strlen($data);
}
}
/**
* @task uploader
*/
public function didFail($error) {
$this->errors[] = $error;
return $this;
}
/**
* @task uploader
*/
public function setPHID($phid) {
$this->phid = $phid;
return $this;
}
/**
* @task uploader
*/
public function getByteSize() {
if ($this->size === null) {
throw new PhutilInvalidStateException('willUpload');
}
return $this->size;
}
/**
* @task uploader
*/
public function getContentHash() {
if ($this->size === null) {
throw new PhutilInvalidStateException('willUpload');
}
return $this->hash;
}
/**
* @task uploader
*/
public function didUpload() {
if ($this->fileHandle) {
@fclose($this->fileHandle);
$this->fileHandle = null;
}
}
/**
* @task uploader
*/
public function readBytes($start, $end) {
if ($this->size === null) {
throw new PhutilInvalidStateException('willUpload');
}
$len = ($end - $start);
if ($this->data !== null) {
return substr($this->data, $start, $len);
}
$path = $this->path;
if ($this->fileHandle === null) {
$f = @fopen($path, 'rb');
if (!$f) {
throw new Exception(
pht(
'Unable to upload file: failed to open path "%s" for reading.',
$path));
}
$this->fileHandle = $f;
}
$f = $this->fileHandle;
$ok = @fseek($f, $start);
if ($ok !== 0) {
throw new Exception(
pht(
'Unable to upload file: failed to fseek() to offset %d in file '.
'at path "%s".',
$start,
$path));
}
$data = @fread($f, $len);
if ($data === false) {
throw new Exception(
pht(
'Unable to upload file: failed to read %d bytes after offset %d '.
'from file at path "%s".',
$len,
$start,
$path));
}
return $data;
}
}

View file

@ -0,0 +1,257 @@
<?php
/**
* Upload a list of @{class:ArcanistFileDataRef} objects over Conduit.
*
* // Create a new uploader.
* $uploader = id(new ArcanistFileUploader())
* ->setConduitClient($conduit);
*
* // Queue one or more files to be uploaded.
* $file = id(new ArcanistFileDataRef())
* ->setName('example.jpg')
* ->setPath('/path/to/example.jpg');
* $uploader->addFile($file);
*
* // Upload the files.
* $files = $uploader->uploadFiles();
*
* For details about building file references, see @{class:ArcanistFileDataRef}.
*
* @task config Configuring the Uploader
* @task add Adding Files
* @task upload Uploading Files
* @task internal Internals
*/
final class ArcanistFileUploader extends Phobject {
private $conduit;
private $files;
/* -( Configuring the Uploader )------------------------------------------- */
/**
* @task config
*/
public function setConduitClient(ConduitClient $conduit) {
$this->conduit = $conduit;
return $this;
}
/* -( Adding Files )------------------------------------------------------- */
/**
* @task add
*/
public function addFile(ArcanistFileDataRef $file) {
$this->files[] = $file;
return $this;
}
/* -( Uploading Files )---------------------------------------------------- */
/**
* @task upload
*/
public function uploadFiles() {
if (!$this->conduit) {
throw new PhutilInvalidStateException('setConduitClient');
}
$files = $this->files;
foreach ($files as $key => $file) {
try {
$file->willUpload();
} catch (Exception $ex) {
$file->didFail($ex->getMessage());
unset($files[$key]);
}
}
$conduit = $this->conduit;
$futures = array();
foreach ($files as $key => $file) {
$futures[$key] = $conduit->callMethod(
'file.allocate',
array(
'name' => $file->getName(),
'contentLength' => $file->getByteSize(),
'contentHash' => $file->getContentHash(),
));
}
$iterator = id(new FutureIterator($futures))->limit(4);
$chunks = array();
foreach ($iterator as $key => $future) {
try {
$result = $future->resolve();
} catch (Exception $ex) {
// The most likely cause for a failure here is that the server does
// not support `file.allocate`. In this case, we'll try the older
// upload method below.
continue;
}
$phid = $result['filePHID'];
$file = $files[$key];
// We don't need to upload any data. Figure out why not: this can either
// be because of an error (server can't accept the data) or because the
// server already has the data.
if (!$result['upload']) {
if (!$phid) {
$file->didFail(
pht(
'Unable to upload file: the server refused to accept file '.
'"%s". This usually means it is too large.',
$file->getName()));
} else {
// These server completed the upload by creating a reference to known
// file data. We don't need to transfer the actual data, and are all
// set.
$file->setPHID($phid);
}
unset($files[$key]);
continue;
}
// The server wants us to do an upload.
if ($phid) {
$chunks[$key] = array(
'file' => $file,
'phid' => $phid,
);
}
}
foreach ($chunks as $key => $chunk) {
$file = $chunk['file'];
$phid = $chunk['phid'];
try {
$this->uploadChunks($file, $phid);
$file->setPHID($phid);
} catch (Exception $ex) {
$file->didFail(
pht(
'Unable to upload file chunks: %s',
$ex->getMessage()));
}
unset($files[$key]);
}
foreach ($files as $key => $file) {
try {
$phid = $this->uploadData($file);
$file->setPHID($phid);
} catch (Exception $ex) {
$file->didFail(
pht(
'Unable to upload file data: %s',
$ex->getMessage()));
}
unset($files[$key]);
}
foreach ($this->files as $file) {
$file->didUpload();
}
return $this->files;
}
/* -( Internals )---------------------------------------------------------- */
/**
* @task internal
*/
private function uploadChunks(ArcanistFileDataRef $file, $file_phid) {
$conduit = $this->conduit;
$chunks = $conduit->callMethodSynchronous(
'file.querychunks',
array(
'filePHID' => $file_phid,
));
$remaining = array();
foreach ($chunks as $chunk) {
if (!$chunk['complete']) {
$remaining[] = $chunk;
}
}
$done = (count($chunks) - count($remaining));
if ($done) {
$this->writeStatus(
pht(
'Resuming upload (%d of %d chunks remain).',
new PhutilNumber(count($remaining)),
new PhutilNumber(count($chunks))));
} else {
$this->writeStatus(
pht(
'Uploading chunks (%d chunks to upload).',
new PhutilNumber(count($remaining))));
}
$progress = new PhutilConsoleProgressBar();
$progress->setTotal(count($chunks));
for ($ii = 0; $ii < $done; $ii++) {
$progress->update(1);
}
$progress->draw();
// TODO: We could do these in parallel to improve upload performance.
foreach ($remaining as $chunk) {
$data = $file->readBytes($chunk['byteStart'], $chunk['byteEnd']);
$conduit->callMethodSynchronous(
'file.uploadchunk',
array(
'filePHID' => $file_phid,
'byteStart' => $chunk['byteStart'],
'dataEncoding' => 'base64',
'data' => base64_encode($data),
));
$progress->update(1);
}
}
/**
* @task internal
*/
private function uploadData(ArcanistFileDataRef $file) {
$conduit = $this->conduit;
$data = $file->readBytes(0, $file->getByteSize());
return $conduit->callMethodSynchronous(
'file.upload',
array(
'name' => $file->getName(),
'data_base64' => base64_encode($data),
));
}
/**
* @task internal
*/
private function writeStatus($message) {
echo $message."\n";
}
}

View file

@ -54,76 +54,28 @@ EOTEXT
$conduit = $this->getConduit();
$results = array();
$uploader = id(new ArcanistFileUploader())
->setConduitClient($conduit);
foreach ($this->paths as $path) {
$path = Filesystem::resolvePath($path);
$file = id(new ArcanistFileDataRef())
->setName(basename($path))
->setPath($path);
$name = basename($path);
$this->writeStatus(pht("Uploading '%s'...", $name));
$uploader->addFile($file);
}
$hash = @sha1_file($path);
if (!$hash) {
throw new Exception(pht('Unable to read file "%s"!', $path));
}
$length = filesize($path);
$do_chunk_upload = false;
$phid = null;
try {
$result = $conduit->callMethodSynchronous(
'file.allocate',
array(
'name' => $name,
'contentLength' => $length,
'contentHash' => $hash,
));
$phid = $result['filePHID'];
if (!$result['upload']) {
if (!$phid) {
$this->writeStatus(
pht(
'Unable to upload file "%s": the server refused to accept '.
'it. This usually means it is too large.',
$name));
continue;
}
// Otherwise, the server completed the upload by referencing known
// file data.
} else {
if ($phid) {
$do_chunk_upload = true;
} else {
// This is a small file that doesn't need to be uploaded in
// chunks, so continue normally.
}
}
} catch (Exception $ex) {
$this->writeStatus(
pht('Unable to use allocate method, trying older upload method.'));
}
if ($do_chunk_upload) {
$this->uploadChunks($phid, $path);
}
if (!$phid) {
try {
$data = Filesystem::readFile($path);
} catch (FilesystemException $ex) {
$this->writeStatus(
pht('Unable to read file "%s".', $ex->getMessage()));
$results[$path] = null;
continue;
}
$phid = $conduit->callMethodSynchronous(
'file.upload',
array(
'data_base64' => base64_encode($data),
'name' => $name,
));
$files = $uploader->uploadFiles();
$results = array();
foreach ($files as $file) {
// TODO: This could be handled more gracefully; just preserving behavior
// until we introduce `file.query` and modernize this.
if ($file->getErrors()) {
throw new Exception(implode("\n", $file->getErrors()));
}
$phid = $file->getPHID();
$name = $file->getName();
$info = $conduit->callMethodSynchronous(
'file.info',