1
0
Fork 0
mirror of https://we.phorge.it/source/phorge.git synced 2024-11-28 09:42:41 +01:00
phorge-phorge/src/applications/files/engine/PhabricatorChunkedFileStorageEngine.php
Austin McKinley b54adc6161 Kick off indexing for File objects on creation
Summary: Ensures that newly-made `File` objects get indexed into the new ngrams index. Fixes T8788.

Test Plan:
  - uploaded a file with daemons stopped; confirmed no new rows in ngrams table
  - started daemons; confirmed indexing of previously-uploaded files happened
  - uploaded a new file with daemons running; confirmed it got added to the index

Not sure how to test the changes to `PhabricatorFileUploadSource->writeChunkedFile()` and `PhabricatorChunkedFileStorageEngine->allocateChunks()`. I spent a few minutes trying to find their callers, but the first looks like it requires a Diffusion repo and the 2nd is only accessible via Conduit. I can test that stuff if necessary, but it's such a small change that I'm not worried about it.

Reviewers: epriestley

Reviewed By: epriestley

Subscribers: Korvin

Maniphest Tasks: T8788

Differential Revision: https://secure.phabricator.com/D17718
2017-04-18 08:38:34 -07:00

197 lines
5.3 KiB
PHP

<?php
final class PhabricatorChunkedFileStorageEngine
extends PhabricatorFileStorageEngine {
public function getEngineIdentifier() {
return 'chunks';
}
public function getEnginePriority() {
return 60000;
}
/**
* We can write chunks if we have at least one valid storage engine
* underneath us.
*/
public function canWriteFiles() {
return (bool)$this->getWritableEngine();
}
public function hasFilesizeLimit() {
return false;
}
public function isChunkEngine() {
return true;
}
public function writeFile($data, array $params) {
// The chunk engine does not support direct writes.
throw new PhutilMethodNotImplementedException();
}
public function readFile($handle) {
// This is inefficient, but makes the API work as expected.
$chunks = $this->loadAllChunks($handle, true);
$buffer = '';
foreach ($chunks as $chunk) {
$data_file = $chunk->getDataFile();
if (!$data_file) {
throw new Exception(pht('This file data is incomplete!'));
}
$buffer .= $chunk->getDataFile()->loadFileData();
}
return $buffer;
}
public function deleteFile($handle) {
$engine = new PhabricatorDestructionEngine();
$chunks = $this->loadAllChunks($handle, true);
foreach ($chunks as $chunk) {
$engine->destroyObject($chunk);
}
}
private function loadAllChunks($handle, $need_files) {
$chunks = id(new PhabricatorFileChunkQuery())
->setViewer(PhabricatorUser::getOmnipotentUser())
->withChunkHandles(array($handle))
->needDataFiles($need_files)
->execute();
$chunks = msort($chunks, 'getByteStart');
return $chunks;
}
/**
* Compute a chunked file hash for the viewer.
*
* We can not currently compute a real hash for chunked file uploads (because
* no process sees all of the file data).
*
* We also can not trust the hash that the user claims to have computed. If
* we trust the user, they can upload some `evil.exe` and claim it has the
* same file hash as `good.exe`. When another user later uploads the real
* `good.exe`, we'll just create a reference to the existing `evil.exe`. Users
* who download `good.exe` will then receive `evil.exe`.
*
* Instead, we rehash the user's claimed hash with account secrets. This
* allows users to resume file uploads, but not collide with other users.
*
* Ideally, we'd like to be able to verify hashes, but this is complicated
* and time consuming and gives us a fairly small benefit.
*
* @param PhabricatorUser Viewing user.
* @param string Claimed file hash.
* @return string Rehashed file hash.
*/
public static function getChunkedHash(PhabricatorUser $viewer, $hash) {
if (!$viewer->getPHID()) {
throw new Exception(
pht('Unable to compute chunked hash without real viewer!'));
}
$input = $viewer->getAccountSecret().':'.$hash.':'.$viewer->getPHID();
return self::getChunkedHashForInput($input);
}
public static function getChunkedHashForInput($input) {
$rehash = PhabricatorHash::weakDigest($input);
// Add a suffix to identify this as a chunk hash.
$rehash = substr($rehash, 0, -2).'-C';
return $rehash;
}
public function allocateChunks($length, array $properties) {
$file = PhabricatorFile::newChunkedFile($this, $length, $properties);
$chunk_size = $this->getChunkSize();
$handle = $file->getStorageHandle();
$chunks = array();
for ($ii = 0; $ii < $length; $ii += $chunk_size) {
$chunks[] = PhabricatorFileChunk::initializeNewChunk(
$handle,
$ii,
min($ii + $chunk_size, $length));
}
$file->openTransaction();
foreach ($chunks as $chunk) {
$chunk->save();
}
$file->saveAndIndex();
$file->saveTransaction();
return $file;
}
/**
* Find a storage engine which is suitable for storing chunks.
*
* This engine must be a writable engine, have a filesize limit larger than
* the chunk limit, and must not be a chunk engine itself.
*/
private function getWritableEngine() {
// NOTE: We can't just load writable engines or we'll loop forever.
$engines = parent::loadAllEngines();
foreach ($engines as $engine) {
if ($engine->isChunkEngine()) {
continue;
}
if ($engine->isTestEngine()) {
continue;
}
if (!$engine->canWriteFiles()) {
continue;
}
if ($engine->hasFilesizeLimit()) {
if ($engine->getFilesizeLimit() < $this->getChunkSize()) {
continue;
}
}
return true;
}
return false;
}
public function getChunkSize() {
return (4 * 1024 * 1024);
}
public function getRawFileDataIterator(
PhabricatorFile $file,
$begin,
$end,
PhabricatorFileStorageFormat $format) {
// NOTE: It is currently impossible for files stored with the chunk
// engine to have their own formatting (instead, the individual chunks
// are formatted), so we ignore the format object.
$chunks = id(new PhabricatorFileChunkQuery())
->setViewer(PhabricatorUser::getOmnipotentUser())
->withChunkHandles(array($file->getStorageHandle()))
->withByteRange($begin, $end)
->needDataFiles(true)
->execute();
return new PhabricatorFileChunkIterator($chunks, $begin, $end);
}
}