From 7a72b0b4f9b96b005b4a87cec870d33af3b18f09 Mon Sep 17 00:00:00 2001 From: epriestley Date: Wed, 10 Aug 2011 11:57:07 -0700 Subject: [PATCH] Be slightly less dumb about detecting "binary" files Summary: A better definition of "binary" is "not utf-8", instead of "has some characters not in this arbitrary regexp". Principally, this makes files with windows newlines not autodetect as binary. This might fix some of the issues in T365. Test Plan: @egillth applied this patch and verified that Diffusion now shows file content instead of detecting everything as binary in his repo full of Windows newlines. Reviewed By: jungejason Reviewers: egillth, tuomaspelkonen, jungejason, aran CC: aran, jungejason Differential Revision: 799 --- src/parser/diff/ArcanistDiffParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser/diff/ArcanistDiffParser.php b/src/parser/diff/ArcanistDiffParser.php index f7fece8f..28a7dd83 100644 --- a/src/parser/diff/ArcanistDiffParser.php +++ b/src/parser/diff/ArcanistDiffParser.php @@ -751,7 +751,7 @@ class ArcanistDiffParser { $is_binary = false; if ($this->detectBinaryFiles) { - $is_binary = preg_match('/([^\x09\x0A\x20-\x7E]+)/', $corpus); + $is_binary = !phutil_is_utf8($corpus); } if ($is_binary) {