diff --git a/src/parser/ArcanistBundle.php b/src/parser/ArcanistBundle.php index 91ccb541..9bc003b3 100644 --- a/src/parser/ArcanistBundle.php +++ b/src/parser/ArcanistBundle.php @@ -226,7 +226,9 @@ final class ArcanistBundle { } $result[] = 'Index: '.$index_path; + $result[] = PHP_EOL; $result[] = str_repeat('=', 67); + $result[] = PHP_EOL; if ($old_path === null) { $old_path = '/dev/null'; @@ -247,8 +249,8 @@ final class ArcanistBundle { $old_path = $cur_path; } - $result[] = '--- '.$old_path; - $result[] = '+++ '.$cur_path; + $result[] = '--- '.$old_path.PHP_EOL; + $result[] = '+++ '.$cur_path.PHP_EOL; $result[] = $hunk_changes; } @@ -257,7 +259,7 @@ final class ArcanistBundle { return ''; } - $diff = implode("\n", $result)."\n"; + $diff = implode('', $result); return $this->convertNonUTF8Diff($diff); } @@ -383,45 +385,45 @@ final class ArcanistBundle { $cur_target = 'b/'.$cur_path; } - $result[] = "diff --git {$old_index} {$cur_index}"; + $result[] = "diff --git {$old_index} {$cur_index}".PHP_EOL; if ($type == ArcanistDiffChangeType::TYPE_ADD) { - $result[] = "new file mode {$new_mode}"; + $result[] = "new file mode {$new_mode}".PHP_EOL; } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE || $type == ArcanistDiffChangeType::TYPE_MOVE_HERE || $type == ArcanistDiffChangeType::TYPE_COPY_AWAY) { if ($old_mode !== $new_mode) { - $result[] = "old mode {$old_mode}"; - $result[] = "new mode {$new_mode}"; + $result[] = "old mode {$old_mode}".PHP_EOL; + $result[] = "new mode {$new_mode}".PHP_EOL; } } if ($type == ArcanistDiffChangeType::TYPE_COPY_HERE) { - $result[] = "copy from {$old_path}"; - $result[] = "copy to {$cur_path}"; + $result[] = "copy from {$old_path}".PHP_EOL; + $result[] = "copy to {$cur_path}".PHP_EOL; } else if ($type == ArcanistDiffChangeType::TYPE_MOVE_HERE) { - $result[] = "rename from {$old_path}"; - $result[] = "rename to {$cur_path}"; + $result[] = "rename from {$old_path}".PHP_EOL; + $result[] = "rename to {$cur_path}".PHP_EOL; } else if ($type == ArcanistDiffChangeType::TYPE_DELETE || $type == ArcanistDiffChangeType::TYPE_MULTICOPY) { $old_mode = idx($change->getOldProperties(), 'unix:filemode'); if ($old_mode) { - $result[] = "deleted file mode {$old_mode}"; + $result[] = "deleted file mode {$old_mode}".PHP_EOL; } } if ($change_body) { if (!$is_binary) { - $result[] = "--- {$old_target}"; - $result[] = "+++ {$cur_target}"; + $result[] = "--- {$old_target}".PHP_EOL; + $result[] = "+++ {$cur_target}".PHP_EOL; } $result[] = $change_body; } } - $diff = implode("\n", $result)."\n"; + $diff = implode('', $result).PHP_EOL; return $this->convertNonUTF8Diff($diff); } @@ -440,7 +442,7 @@ final class ArcanistBundle { $context = 3; $results = array(); - $lines = explode("\n", $base_hunk->getCorpus()); + $lines = phutil_split_lines($base_hunk->getCorpus()); $n = count($lines); $old_offset = $base_hunk->getOldOffset(); @@ -530,7 +532,7 @@ final class ArcanistBundle { $hunk->setNewLength($count_length - $old_lines); $corpus = array_slice($lines, $hunk_start, $hunk_length); - $corpus = implode("\n", $corpus); + $corpus = implode('', $corpus); $hunk->setCorpus($corpus); $results[] = $hunk; @@ -596,11 +598,11 @@ final class ArcanistBundle { $n_head = "{$n_off},{$n_len}"; } - $result[] = "@@ -{$o_head} +{$n_head} @@"; + $result[] = "@@ -{$o_head} +{$n_head} @@".PHP_EOL; $result[] = $corpus; } } - return implode("\n", $result); + return implode('', $result); } public function setLoadFileDataCallback($callback) { @@ -656,16 +658,16 @@ final class ArcanistBundle { } $content = array(); - $content[] = "index {$old_sha1}..{$new_sha1}"; - $content[] = "GIT binary patch"; + $content[] = "index {$old_sha1}..{$new_sha1}".PHP_EOL; + $content[] = "GIT binary patch".PHP_EOL; - $content[] = "literal {$new_length}"; - $content[] = $this->emitBinaryDiffBody($new_data); + $content[] = "literal {$new_length}".PHP_EOL; + $content[] = $this->emitBinaryDiffBody($new_data).PHP_EOL; - $content[] = "literal {$old_length}"; - $content[] = $this->emitBinaryDiffBody($old_data); + $content[] = "literal {$old_length}".PHP_EOL; + $content[] = $this->emitBinaryDiffBody($old_data).PHP_EOL; - return implode("\n", $content); + return implode('', $content); } private function emitBinaryDiffBody($data) { @@ -691,11 +693,9 @@ final class ArcanistBundle { $buf .= chr($len - 26 + ord('a') - 1); } $buf .= self::encodeBase85($line); - $buf .= "\n"; + $buf .= PHP_EOL; } - $buf .= "\n"; - return $buf; } diff --git a/src/parser/ArcanistDiffParser.php b/src/parser/ArcanistDiffParser.php index fcada2a0..ae582815 100644 --- a/src/parser/ArcanistDiffParser.php +++ b/src/parser/ArcanistDiffParser.php @@ -235,10 +235,11 @@ final class ArcanistDiffParser { '(?Pdiff -r) (?P[a-f0-9]+) (?:-r [a-f0-9]+ )?(?P.+)', ); - $line = $this->getLine(); + $line = $this->getLineTrimmed(); $match = null; $ok = $this->tryMatchHeader($patterns, $line, $match); + $failed_parse = false; if (!$ok && $this->isFirstNonEmptyLine()) { // 'hg export' command creates so called "extended diff" that // contains some meta information and comment at the beginning @@ -250,13 +251,19 @@ final class ArcanistDiffParser { if (!$this->tryMatchHeader($patterns, $line, $match)) { // Restore line before guessing to display correct error. $this->restoreLine(); - $this->didFailParse( - "Expected a hunk header, like 'Index: /path/to/file.ext' (svn), ". - "'Property changes on: /path/to/file.ext' (svn properties), ". - "'commit 59bcc3ad6775562f845953cf01624225' (git show), ". - "'diff --git' (git diff), '--- filename' (unified diff), or " . - "'diff -r' (hg diff or patch)."); + $failed_parse = true; } + } else if (!$ok) { + $failed_parse = true; + } + + if ($failed_parse) { + $this->didFailParse( + "Expected a hunk header, like 'Index: /path/to/file.ext' (svn), ". + "'Property changes on: /path/to/file.ext' (svn properties), ". + "'commit 59bcc3ad6775562f845953cf01624225' (git show), ". + "'diff --git' (git diff), '--- filename' (unified diff), or " . + "'diff -r' (hg diff or patch)."); } if (isset($match['type'])) { @@ -361,15 +368,17 @@ final class ArcanistDiffParser { $this->didFailParse("Expected 'Date:'."); } - while (($line = $this->nextLine()) !== null) { + while (($line = $this->nextLineTrimmed()) !== null) { if (strlen($line) && $line[0] != ' ') { break; } - // Strip leading spaces from Git commit messages. - $message[] = substr($line, 4); + + // Strip leading spaces from Git commit messages. Note that empty lines + // are represented as just "\n"; don't touch those. + $message[] = preg_replace('/^ /', '', $this->getLine()); } - $message = rtrim(implode("\n", $message)); + $message = rtrim(implode('', $message), "\r\n"); $change->setMetadata('message', $message); } @@ -472,8 +481,8 @@ final class ArcanistDiffParser { $line = $this->nextLine(); } - $old = rtrim(implode("\n", $old)); - $new = rtrim(implode("\n", $new)); + $old = rtrim(implode('', $old)); + $new = rtrim(implode('', $new)); if (!strlen($old)) { $old = null; @@ -754,7 +763,7 @@ final class ArcanistDiffParser { $this->didFailParse("Expected 'literal NNNN' to start git binary patch."); } do { - $line = $this->nextLine(); + $line = $this->nextLineTrimmed(); if ($line === '' || $line === null) { // Some versions of Mercurial apparently omit the terminal newline, // although it's unclear if Git will ever do this. In either case, @@ -800,7 +809,7 @@ final class ArcanistDiffParser { $all_changes = array(); do { $hunk = new ArcanistDiffHunk(); - $line = $this->getLine(); + $line = $this->getLineTrimmed(); $real = array(); // In the case where only one line is changed, the length is omitted. @@ -896,6 +905,8 @@ final class ArcanistDiffParser { --$new_len; $real[] = $line; break; + case "\r": + case "\n": case '~': $advance = true; break 2; @@ -908,7 +919,7 @@ final class ArcanistDiffParser { $this->didFailParse("Found the wrong number of hunk lines."); } - $corpus = implode("\n", $real); + $corpus = implode('', $real); $is_binary = false; if ($this->detectBinaryFiles) { @@ -1007,13 +1018,7 @@ final class ArcanistDiffParser { $text = preg_replace('/'.$ansi_color_pattern.'/', '', $text); } - // TODO: This is a hack for SVN + Windows. Eventually, we should retain line - // endings and preserve them through the patch lifecycle or something along - // those lines. - - // NOTE: On Windows, a diff may have \n delimited lines (because the file - // uses \n) but \r\n delimited blocks. Split on both. - $this->text = preg_split('/\r?\n/', $text); + $this->text = phutil_split_lines($text); $this->line = 0; } @@ -1027,11 +1032,27 @@ final class ArcanistDiffParser { return null; } + protected function getLineTrimmed() { + $line = $this->getLine(); + if ($line !== null) { + $line = trim($line, "\r\n"); + } + return $line; + } + protected function nextLine() { $this->line++; return $this->getLine(); } + protected function nextLineTrimmed() { + $line = $this->nextLine(); + if ($line !== null) { + $line = trim($line, "\r\n"); + } + return $line; + } + protected function nextNonemptyLine() { while (($line = $this->nextLine()) !== null) { if (strlen(trim($line)) !== 0) { @@ -1079,14 +1100,14 @@ final class ArcanistDiffParser { } protected function didFailParse($message) { - $context = 3; + $context = 5; $min = max(0, $this->line - $context); $max = min($this->line + $context, count($this->text) - 1); $context = ''; for ($ii = $min; $ii <= $max; $ii++) { $context .= sprintf( - "%8.8s %6.6s %s\n", + "%8.8s %6.6s %s", ($ii == $this->line) ? '>>> ' : '', $ii + 1, $this->text[$ii]); diff --git a/src/parser/__tests__/ArcanistDiffParserTestCase.php b/src/parser/__tests__/ArcanistDiffParserTestCase.php index 025dbae4..3a6b785d 100644 --- a/src/parser/__tests__/ArcanistDiffParserTestCase.php +++ b/src/parser/__tests__/ArcanistDiffParserTestCase.php @@ -93,6 +93,7 @@ final class ArcanistDiffParserTestCase extends ArcanistTestCase { %% %% %%% + EOCORPUS; $corpus_1 = <<assertEqual( $corpus_0, diff --git a/src/parser/__tests__/bundle.git.tgz b/src/parser/__tests__/bundle.git.tgz index c3a3fcac..41e85594 100644 Binary files a/src/parser/__tests__/bundle.git.tgz and b/src/parser/__tests__/bundle.git.tgz differ