From 1d9dbf55689a067a27793bdf20a29f8eb7788ff7 Mon Sep 17 00:00:00 2001 From: Anders Waldenborg Date: Mon, 21 Jun 2021 19:22:18 +0200 Subject: [PATCH] make clang_tidy_report handle diffs text files with invalid utf-8 "git diff" handles text files encoded that is not valid UTF-8 (e.g using ISO-8859-1) as text files and produces a diff of those (rather saying "Binary files a/x and b/x differ"). This means that the diff output may contain such characters. Files that did would cause clang_tidy_report.py do hit an UnicodeDecodeError when reading the diff, including if it was on removed lines and regardless if it was in the ignore file. By specifying errors mode "replace" for decode() method the bytes that are not a valid utf-8 encoding are replaced with the unicode replacement question mark (U+FFFD). When parsing the diff clang-tidy-diff is only looking at filenames and line numbers of the diff, so this shouldn't be a problem if it doesn't get the exact same byte sequence inside the actual change. --- scripts/clang_tidy_report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/clang_tidy_report.py b/scripts/clang_tidy_report.py index 941e024..f8f6518 100755 --- a/scripts/clang_tidy_report.py +++ b/scripts/clang_tidy_report.py @@ -34,7 +34,7 @@ def run(base_commit, ignore_config, step: Optional[Step], report: Optional[Repor step = Step() # For debugging. r = subprocess.run(f'git diff -U0 --no-prefix {base_commit}', shell=True, capture_output=True) logging.debug(f'git diff {r}') - diff = r.stdout.decode() + diff = r.stdout.decode("utf-8", "replace") if ignore_config is not None and os.path.exists(ignore_config): ignore = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, open(ignore_config, 'r').readlines())