2016-01-21 13:25:38 +01:00
|
|
|
<?php
|
|
|
|
|
|
|
|
abstract class PhabricatorFileUploadSource
|
|
|
|
extends Phobject {
|
|
|
|
|
|
|
|
private $name;
|
Make the Files "TTL" API more structured
Summary:
Ref T11357. When creating a file, callers can currently specify a `ttl`. However, it isn't unambiguous what you're supposed to pass, and some callers get it wrong.
For example, to mean "this file expires in 60 minutes", you might pass either of these:
- `time() + phutil_units('60 minutes in seconds')`
- `phutil_units('60 minutes in seconds')`
The former means "60 minutes from now". The latter means "1 AM, January 1, 1970". In practice, because the GC normally runs only once every four hours (at least, until recently), and all the bad TTLs are cases where files are normally accessed immediately, these 1970 TTLs didn't cause any real problems.
Split `ttl` into `ttl.relative` and `ttl.absolute`, and make sure the values are sane. Then correct all callers, and simplify out the `time()` calls where possible to make switching to `PhabricatorTime` easier.
Test Plan:
- Generated an SSH keypair.
- Viewed a changeset.
- Viewed a raw diff.
- Viewed a commit's file data.
- Viewed a temporary file's details, saw expiration date and relative time.
- Ran unit tests.
- (Didn't really test Phragment.)
Reviewers: chad
Reviewed By: chad
Subscribers: hach-que
Maniphest Tasks: T11357
Differential Revision: https://secure.phabricator.com/D17616
2017-04-04 20:01:43 +02:00
|
|
|
private $relativeTTL;
|
2016-01-21 13:25:38 +01:00
|
|
|
private $viewPolicy;
|
2018-01-29 03:47:42 +01:00
|
|
|
private $mimeType;
|
|
|
|
private $authorPHID;
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
private $rope;
|
|
|
|
private $data;
|
|
|
|
private $shouldChunk;
|
|
|
|
private $didRewind;
|
|
|
|
private $totalBytesWritten = 0;
|
2018-01-19 23:11:07 +01:00
|
|
|
private $totalBytesRead = 0;
|
|
|
|
private $byteLimit = 0;
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
public function setName($name) {
|
|
|
|
$this->name = $name;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getName() {
|
|
|
|
return $this->name;
|
|
|
|
}
|
|
|
|
|
Make the Files "TTL" API more structured
Summary:
Ref T11357. When creating a file, callers can currently specify a `ttl`. However, it isn't unambiguous what you're supposed to pass, and some callers get it wrong.
For example, to mean "this file expires in 60 minutes", you might pass either of these:
- `time() + phutil_units('60 minutes in seconds')`
- `phutil_units('60 minutes in seconds')`
The former means "60 minutes from now". The latter means "1 AM, January 1, 1970". In practice, because the GC normally runs only once every four hours (at least, until recently), and all the bad TTLs are cases where files are normally accessed immediately, these 1970 TTLs didn't cause any real problems.
Split `ttl` into `ttl.relative` and `ttl.absolute`, and make sure the values are sane. Then correct all callers, and simplify out the `time()` calls where possible to make switching to `PhabricatorTime` easier.
Test Plan:
- Generated an SSH keypair.
- Viewed a changeset.
- Viewed a raw diff.
- Viewed a commit's file data.
- Viewed a temporary file's details, saw expiration date and relative time.
- Ran unit tests.
- (Didn't really test Phragment.)
Reviewers: chad
Reviewed By: chad
Subscribers: hach-que
Maniphest Tasks: T11357
Differential Revision: https://secure.phabricator.com/D17616
2017-04-04 20:01:43 +02:00
|
|
|
public function setRelativeTTL($relative_ttl) {
|
|
|
|
$this->relativeTTL = $relative_ttl;
|
2016-01-21 13:25:38 +01:00
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
Make the Files "TTL" API more structured
Summary:
Ref T11357. When creating a file, callers can currently specify a `ttl`. However, it isn't unambiguous what you're supposed to pass, and some callers get it wrong.
For example, to mean "this file expires in 60 minutes", you might pass either of these:
- `time() + phutil_units('60 minutes in seconds')`
- `phutil_units('60 minutes in seconds')`
The former means "60 minutes from now". The latter means "1 AM, January 1, 1970". In practice, because the GC normally runs only once every four hours (at least, until recently), and all the bad TTLs are cases where files are normally accessed immediately, these 1970 TTLs didn't cause any real problems.
Split `ttl` into `ttl.relative` and `ttl.absolute`, and make sure the values are sane. Then correct all callers, and simplify out the `time()` calls where possible to make switching to `PhabricatorTime` easier.
Test Plan:
- Generated an SSH keypair.
- Viewed a changeset.
- Viewed a raw diff.
- Viewed a commit's file data.
- Viewed a temporary file's details, saw expiration date and relative time.
- Ran unit tests.
- (Didn't really test Phragment.)
Reviewers: chad
Reviewed By: chad
Subscribers: hach-que
Maniphest Tasks: T11357
Differential Revision: https://secure.phabricator.com/D17616
2017-04-04 20:01:43 +02:00
|
|
|
public function getRelativeTTL() {
|
|
|
|
return $this->relativeTTL;
|
2016-01-21 13:25:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
public function setViewPolicy($view_policy) {
|
|
|
|
$this->viewPolicy = $view_policy;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getViewPolicy() {
|
|
|
|
return $this->viewPolicy;
|
|
|
|
}
|
|
|
|
|
2018-01-19 23:11:07 +01:00
|
|
|
public function setByteLimit($byte_limit) {
|
|
|
|
$this->byteLimit = $byte_limit;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getByteLimit() {
|
|
|
|
return $this->byteLimit;
|
|
|
|
}
|
|
|
|
|
2018-01-29 03:47:42 +01:00
|
|
|
public function setMIMEType($mime_type) {
|
|
|
|
$this->mimeType = $mime_type;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getMIMEType() {
|
|
|
|
return $this->mimeType;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function setAuthorPHID($author_phid) {
|
|
|
|
$this->authorPHID = $author_phid;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function getAuthorPHID() {
|
|
|
|
return $this->authorPHID;
|
|
|
|
}
|
|
|
|
|
2016-01-21 13:25:38 +01:00
|
|
|
public function uploadFile() {
|
|
|
|
if (!$this->shouldChunkFile()) {
|
|
|
|
return $this->writeSingleFile();
|
|
|
|
} else {
|
|
|
|
return $this->writeChunkedFile();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getDataIterator() {
|
|
|
|
if (!$this->data) {
|
|
|
|
$this->data = $this->newDataIterator();
|
|
|
|
}
|
|
|
|
return $this->data;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getRope() {
|
|
|
|
if (!$this->rope) {
|
|
|
|
$this->rope = new PhutilRope();
|
|
|
|
}
|
|
|
|
return $this->rope;
|
|
|
|
}
|
|
|
|
|
|
|
|
abstract protected function newDataIterator();
|
|
|
|
abstract protected function getDataLength();
|
|
|
|
|
|
|
|
private function readFileData() {
|
|
|
|
$data = $this->getDataIterator();
|
|
|
|
|
|
|
|
if (!$this->didRewind) {
|
|
|
|
$data->rewind();
|
|
|
|
$this->didRewind = true;
|
|
|
|
} else {
|
2016-03-18 14:22:37 +01:00
|
|
|
if ($data->valid()) {
|
|
|
|
$data->next();
|
|
|
|
}
|
2016-01-21 13:25:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!$data->valid()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-01-19 23:11:07 +01:00
|
|
|
$read_bytes = $data->current();
|
|
|
|
$this->totalBytesRead += strlen($read_bytes);
|
|
|
|
|
|
|
|
if ($this->byteLimit && ($this->totalBytesRead > $this->byteLimit)) {
|
|
|
|
throw new PhabricatorFileUploadSourceByteLimitException();
|
|
|
|
}
|
|
|
|
|
2016-01-21 13:25:38 +01:00
|
|
|
$rope = $this->getRope();
|
2018-01-19 23:11:07 +01:00
|
|
|
$rope->append($read_bytes);
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function shouldChunkFile() {
|
|
|
|
if ($this->shouldChunk !== null) {
|
|
|
|
return $this->shouldChunk;
|
|
|
|
}
|
|
|
|
|
|
|
|
$threshold = PhabricatorFileStorageEngine::getChunkThreshold();
|
|
|
|
|
2016-02-03 23:44:19 +01:00
|
|
|
if ($threshold === null) {
|
|
|
|
// If there are no chunk engines available, we clearly can't chunk the
|
|
|
|
// file.
|
|
|
|
$this->shouldChunk = false;
|
|
|
|
} else {
|
|
|
|
// If we don't know how large the file is, we're going to read some data
|
|
|
|
// from it until we know whether it's a small file or not. This will give
|
|
|
|
// us enough information to make a decision about chunking.
|
|
|
|
$length = $this->getDataLength();
|
|
|
|
if ($length === null) {
|
|
|
|
$rope = $this->getRope();
|
|
|
|
while ($this->readFileData()) {
|
|
|
|
$length = $rope->getByteLength();
|
|
|
|
if ($length > $threshold) {
|
|
|
|
break;
|
|
|
|
}
|
2016-01-21 13:25:38 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-03 23:44:19 +01:00
|
|
|
$this->shouldChunk = ($length > $threshold);
|
|
|
|
}
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
return $this->shouldChunk;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSingleFile() {
|
|
|
|
while ($this->readFileData()) {
|
|
|
|
// Read the entire file.
|
|
|
|
}
|
|
|
|
|
|
|
|
$rope = $this->getRope();
|
|
|
|
$data = $rope->getAsString();
|
|
|
|
|
|
|
|
$parameters = $this->getNewFileParameters();
|
|
|
|
|
|
|
|
return PhabricatorFile::newFromFileData($data, $parameters);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeChunkedFile() {
|
|
|
|
$engine = $this->getChunkEngine();
|
|
|
|
|
|
|
|
$parameters = $this->getNewFileParameters();
|
|
|
|
|
|
|
|
$data_length = $this->getDataLength();
|
|
|
|
if ($data_length !== null) {
|
|
|
|
$length = $data_length;
|
|
|
|
} else {
|
|
|
|
$length = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$file = PhabricatorFile::newChunkedFile($engine, $length, $parameters);
|
2017-04-18 17:13:34 +02:00
|
|
|
$file->saveAndIndex();
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
$rope = $this->getRope();
|
|
|
|
|
|
|
|
// Read the source, writing chunks as we get enough data.
|
|
|
|
while ($this->readFileData()) {
|
|
|
|
while (true) {
|
|
|
|
$rope_length = $rope->getByteLength();
|
|
|
|
if ($rope_length < $engine->getChunkSize()) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
$this->writeChunk($file, $engine);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-01-19 23:11:07 +01:00
|
|
|
// If we have extra bytes at the end, write them. Note that it's possible
|
|
|
|
// that we have more than one chunk of bytes left if the read was very
|
|
|
|
// fast.
|
|
|
|
while ($rope->getByteLength()) {
|
2016-01-21 13:25:38 +01:00
|
|
|
$this->writeChunk($file, $engine);
|
|
|
|
}
|
|
|
|
|
|
|
|
$file->setIsPartial(0);
|
|
|
|
if ($data_length === null) {
|
|
|
|
$file->setByteSize($this->getTotalBytesWritten());
|
|
|
|
}
|
|
|
|
$file->save();
|
|
|
|
|
|
|
|
return $file;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeChunk(
|
|
|
|
PhabricatorFile $file,
|
|
|
|
PhabricatorFileStorageEngine $engine) {
|
|
|
|
|
|
|
|
$offset = $this->getTotalBytesWritten();
|
|
|
|
$max_length = $engine->getChunkSize();
|
|
|
|
$rope = $this->getRope();
|
|
|
|
|
|
|
|
$data = $rope->getPrefixBytes($max_length);
|
|
|
|
$actual_length = strlen($data);
|
|
|
|
$rope->removeBytesFromHead($actual_length);
|
|
|
|
|
Don't compute MIME type of noninitial chunks from `diffusion.filecontentquery`
Summary:
Ref T12857. This is generally fairly fuzzy for now, but here's something concrete: when we build a large file with `diffusion.filecontentquery`, we compute the MIME type of all chunks, not just the initial chunk.
Instead, pass a dummy MIME type to non-initial chunks so we don't try to compute them. This mirrors logic elsewhere, in `file.uploadchunk`. This should perhaps be centralized at some point, but it's a bit tricky since the file doesn't know that it's a chunk until later.
Also, clean up the `TempFile` immediately -- this shouldn't actually affect anything, but we don't need it to live any longer than this.
Test Plan:
- Made `hashFileContent()` return `null` to skip the chunk cache.
- Added `phlog()` to the MIME type computation.
- Loaded a 12MB file in Diffusion.
- Before patch: Saw 3x MIME type computations, one for each 4MB chunk.
- After patch: Saw 1x MIME type computation, for initial chunk only.
Reviewers: chad, amckinley
Reviewed By: chad
Maniphest Tasks: T12857
Differential Revision: https://secure.phabricator.com/D18138
2017-06-19 23:41:32 +02:00
|
|
|
$params = array(
|
|
|
|
'name' => $file->getMonogram().'.chunk-'.$offset,
|
|
|
|
'viewPolicy' => PhabricatorPolicies::POLICY_NOONE,
|
Stop trying to assess the image dimensions of large files and file chunks
Summary:
Depends on D18828. Ref T7789. See <https://discourse.phabricator-community.org/t/git-lfs-fails-with-large-images/584>.
Currently, when you upload a large (>4MB) image, we may try to assess the dimensions for the image and for each individual chunk.
At best, this is slow and not useful. At worst, it fatals or consumes a ton of memory and I/O we don't need to be using.
Instead:
- Don't try to assess dimensions for chunked files.
- Don't try to assess dimensions for the chunks themselves.
- Squelch errors for bad data, etc., that `gd` can't actually read, since we recover sensibly.
Test Plan:
- Created a 2048x2048 PNG in Photoshop using the "Random Noise" filter which weighs 8.5MB.
- Uploaded it.
- Before patch: got complaints in log about `imagecreatefromstring()` failing, although the actual upload went OK in my environment.
- After patch: clean log, no attempt to detect the size of a big image.
- Also uploaded a small image, got dimensions detected properly still.
Reviewers: amckinley
Reviewed By: amckinley
Maniphest Tasks: T7789
Differential Revision: https://secure.phabricator.com/D18830
2017-12-13 15:28:11 +01:00
|
|
|
'chunk' => true,
|
Don't compute MIME type of noninitial chunks from `diffusion.filecontentquery`
Summary:
Ref T12857. This is generally fairly fuzzy for now, but here's something concrete: when we build a large file with `diffusion.filecontentquery`, we compute the MIME type of all chunks, not just the initial chunk.
Instead, pass a dummy MIME type to non-initial chunks so we don't try to compute them. This mirrors logic elsewhere, in `file.uploadchunk`. This should perhaps be centralized at some point, but it's a bit tricky since the file doesn't know that it's a chunk until later.
Also, clean up the `TempFile` immediately -- this shouldn't actually affect anything, but we don't need it to live any longer than this.
Test Plan:
- Made `hashFileContent()` return `null` to skip the chunk cache.
- Added `phlog()` to the MIME type computation.
- Loaded a 12MB file in Diffusion.
- Before patch: Saw 3x MIME type computations, one for each 4MB chunk.
- After patch: Saw 1x MIME type computation, for initial chunk only.
Reviewers: chad, amckinley
Reviewed By: chad
Maniphest Tasks: T12857
Differential Revision: https://secure.phabricator.com/D18138
2017-06-19 23:41:32 +02:00
|
|
|
);
|
|
|
|
|
|
|
|
// If this isn't the initial chunk, provide a dummy MIME type so we do not
|
|
|
|
// try to detect it. See T12857.
|
|
|
|
if ($offset > 0) {
|
|
|
|
$params['mime-type'] = 'application/octet-stream';
|
|
|
|
}
|
|
|
|
|
|
|
|
$chunk_data = PhabricatorFile::newFromFileData($data, $params);
|
2016-01-21 13:25:38 +01:00
|
|
|
|
|
|
|
$chunk = PhabricatorFileChunk::initializeNewChunk(
|
|
|
|
$file->getStorageHandle(),
|
|
|
|
$offset,
|
|
|
|
$offset + $actual_length);
|
|
|
|
|
|
|
|
$chunk
|
|
|
|
->setDataFilePHID($chunk_data->getPHID())
|
|
|
|
->save();
|
|
|
|
|
|
|
|
$this->setTotalBytesWritten($offset + $actual_length);
|
|
|
|
|
|
|
|
return $chunk;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getNewFileParameters() {
|
2017-06-12 20:19:37 +02:00
|
|
|
$parameters = array(
|
2016-01-21 13:25:38 +01:00
|
|
|
'name' => $this->getName(),
|
|
|
|
'viewPolicy' => $this->getViewPolicy(),
|
|
|
|
);
|
2017-06-12 20:19:37 +02:00
|
|
|
|
|
|
|
$ttl = $this->getRelativeTTL();
|
|
|
|
if ($ttl !== null) {
|
|
|
|
$parameters['ttl.relative'] = $ttl;
|
|
|
|
}
|
|
|
|
|
2018-01-29 03:47:42 +01:00
|
|
|
$mime_type = $this->getMimeType();
|
|
|
|
if ($mime_type !== null) {
|
|
|
|
$parameters['mime-type'] = $mime_type;
|
|
|
|
}
|
|
|
|
|
|
|
|
$author_phid = $this->getAuthorPHID();
|
|
|
|
if ($author_phid !== null) {
|
|
|
|
$parameters['authorPHID'] = $author_phid;
|
|
|
|
}
|
|
|
|
|
2017-06-12 20:19:37 +02:00
|
|
|
return $parameters;
|
2016-01-21 13:25:38 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
private function getChunkEngine() {
|
|
|
|
$chunk_engines = PhabricatorFileStorageEngine::loadWritableChunkEngines();
|
|
|
|
if (!$chunk_engines) {
|
|
|
|
throw new Exception(
|
|
|
|
pht(
|
|
|
|
'Unable to upload file: this server is not configured with any '.
|
|
|
|
'storage engine which can store large files.'));
|
|
|
|
}
|
|
|
|
|
|
|
|
return head($chunk_engines);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function setTotalBytesWritten($total_bytes_written) {
|
|
|
|
$this->totalBytesWritten = $total_bytes_written;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function getTotalBytesWritten() {
|
|
|
|
return $this->totalBytesWritten;
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|