#!/usr/bin/env python3 # Copyright 2019 Google LLC # # Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://llvm.org/LICENSE.txt # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # Get data on Revisions and builds from Phabricator import phabricator import json import os import datetime from typing import Dict, List, Optional import csv import time import socket import git import argparse # PHIDs of build plans used for pre-merge testing # FIXME: how do you get these? _PRE_MERGE_PHIDs = ['PHID-HMCP-bfkbtacsszhg3feydpo6', # beta testers 'PHID-HMCP-qbviyekvgirhhhvkzpsn', # public pre-merge tests 'PHID-HMCP-p2oc4ocen3l2yzymvg2l', ] # query all data after this date START_DATE = datetime.date(year=2019, month=10, day=1) class PhabResponse: def __init__(self, revision_dict: Dict): self.revision_dict = revision_dict @property def id(self) -> str: return self.revision_dict['id'] @property def phid(self) -> str: return self.revision_dict['phid'] def __str__(self): return str(self.revision_dict) class Revision(PhabResponse): def __init__(self, revision_dict): super().__init__(revision_dict) self.buildables = [] # type: List['Buildable'] self.diffs = [] # type: List['Diff'] @property def status(self) -> str: return self.revision_dict['fields']['status']['value'] @property def builds(self) -> List['Build']: builds = [] for b in self.buildables: builds.extend(b.builds) return builds @property def created_date(self): return self.revision_dict['fields']['dateCreated'] @property def was_premerge_tested(self) -> bool: return any((b.was_premerge_tested for b in self.builds)) @property def repository_phid(self) -> str: return self.revision_dict['fields']['repositoryPHID'] @property def diff_phid(self) -> str: return self.revision_dict['fields']['diffPHID'] @property def all_diffs_have_refs(self) -> bool: return not any(not d.has_refs for d in self.diffs) @property def day(self) -> datetime.date: return datetime.date.fromtimestamp(self.created_date) @property def week(self) -> str: day = self.day return'{}-w{:02d}'.format(day.year, day.isocalendar()[1]) @property def published(self) -> bool: return self.status == 'published' @property def build_status_list(self) -> List[bool]: return [b.passed for b in self.builds if b.was_premerge_tested] @property def builds_finally_succeeded(self) -> bool: """Return true iff one of the builds failed and the last build passed.""" return self.has_failed_builds and self.build_status_list[-1] @property def has_failed_builds(self) -> bool: """Return true iff one of the builds failed.""" return False in self.build_status_list @property def published_failing(self) -> bool: """Return true iff published and the last build failed.""" return self.was_premerge_tested and self.published \ and not self.build_status_list[-1] @property def all_builds_passed(self) -> bool: return self.was_premerge_tested and all(self.build_status_list) @property def all_builds_failed(self) -> bool: return self.was_premerge_tested and all(not b for b in self.build_status_list) @property def dateModified(self) -> datetime.datetime: return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateModified']) class Buildable(PhabResponse): def __init__(self, revision_dict): super().__init__(revision_dict) self.builds = [] # type: List[Build] self.revision = None # type: Optional[Revision] @property def diff_phid(self) -> str: return self.revision_dict['buildablePHID'] @property def revison_phid(self) -> str: return self.revision_dict['containerPHID'] class Build(PhabResponse): def __init__(self, revision_dict): super().__init__(revision_dict) self.buildable = None # type: Optional[Buildable] @property def buildable_phid(self) -> str: return self.revision_dict['fields']['buildablePHID'] @property def buildplan_phid(self) -> str: return self.revision_dict['fields']['buildPlanPHID'] @property def was_premerge_tested(self) -> bool: result = self.buildplan_phid in _PRE_MERGE_PHIDs return result @property def passed(self) -> bool: """Returns true, if the build "passed" """ return self.was_premerge_tested and self.revision_dict['fields']['buildStatus']['value'] == 'passed' @property def dateModified(self) ->datetime.datetime: return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateModified']) class Diff(PhabResponse): def __init__(self, revision_dict): super().__init__(revision_dict) self.revision = None # type: Optional[Revision] @property def revison_phid(self) -> str: return self.revision_dict['fields']['revisionPHID'] @property def _refs(self) -> List: return self.revision_dict['fields']['refs'] @property def has_refs(self) -> bool: return len(self._refs) > 0 @property def base_revision(self) -> str: for ref in self._refs: if ref['type'] == 'base': return ref['identifier'] return None @property def base_branch(self) -> str: for ref in self._refs: if ref['type'] == 'branch': return ref['name'] return None @property def dateCreated(self) -> datetime.datetime: return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateCreated']) class PhabBuildPuller: # files/folder for sotring temporary results _TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'tmp')) _REVISION_FILE = os.path.join(_TMP_DIR, 'phab-revisions.json') _BUILDABLE_FILE = os.path.join(_TMP_DIR, 'phab-buildables.json') _BUILD_FILE = os.path.join(_TMP_DIR, 'phab-build.json') _DIFF_FILE = os.path.join(_TMP_DIR, 'phab-diffs.json') _PHAB_WEEKLY_METRICS_FILE = os.path.join(_TMP_DIR, 'phabricator_{}.csv') def __init__(self, repo_path: str): self.conduit_token = None self.host = None self.phab = self._create_phab() self._repo_path = repo_path # type: str self.revisions = {} # type: Dict[str, Revision] self.buildables = {} # type: Dict[str, Buildable] self.builds = {} # type: Dict[str, Build] self.diffs = {} # type: Dict[str, Diff] def _create_phab(self) -> phabricator.Phabricator: phab = phabricator.Phabricator(token=self.conduit_token, host=self.host) phab.update_interfaces() return phab def _load_arcrc(self): """Load arc configuration from file if not set.""" if self.conduit_token is not None or self.host is not None: return print('Loading configuration from ~/.arcrc file') with open(os.path.expanduser('~/.arcrc'), 'r') as arcrc_file: arcrc = json.load(arcrc_file) # use the first host configured in the file self.host = next(iter(arcrc['hosts'])) self.conduit_token = arcrc['hosts'][self.host]['token'] def run(self): if not os.path.exists(self._TMP_DIR): os.mkdir(self._TMP_DIR) if not os.path.isfile(self._REVISION_FILE): self.get_revisions() self.parse_revisions() if not os.path.isfile(self._BUILDABLE_FILE): self.get_buildables() self.parse_buildables() if not os.path.isfile(self._BUILD_FILE): self.get_builds() self.parse_builds() if not os.path.isfile(self._DIFF_FILE): self.get_diffs() self.parse_diffs() self.link_objects() self.compute_metrics('day', lambda r: r.day) self.compute_metrics('week', lambda r: r.week) self.count_base_revisions() self.revision_statistics() self.match_base_revisions_with_repo(self._repo_path) def get_revisions(self): print('Downloading revisions starting...') from_date = int(START_DATE.strftime('%s')) data = [] constraints = { 'createdStart': from_date } # FIXME: lots of code duplication around pagination and error handling. # find a way to separate this into a function. after = None while True: revisions = self.phab.differential.revision.search( constraints=constraints, after=after) data.extend(revisions.response['data']) print('{} revisions...'.format(len(data))) after = revisions.response['cursor']['after'] if after is None: break print('Number of revisions:', len(data)) with open(self._REVISION_FILE, 'w') as json_file: json.dump(data, json_file) def get_buildables(self): print('Downloading buildables...') data = [] after = None while True: revisions = self.phab.harbormaster.querybuildables( containerPHIDs=[r.phid for r in self.revisions.values()], after=after) data.extend(revisions.response['data']) print('{} buildables...'.format(len(data))) after = revisions.response['cursor']['after'] if after is None: break print('Number of buildables:', len(data)) with open(self._BUILDABLE_FILE, 'w') as json_file: json.dump(data, json_file) def get_builds(self): print('Downloading builds...') data = [] constraints = { 'buildables': [r.phid for r in self.buildables.values()] } after = None while True: # retry on timeouts fail_count = 0 while True: try: revisions = self.phab.harbormaster.build.search( constraints=constraints, after=after) except socket.timeout: fail_count +=1 if fail_count > 5: raise time.sleep(10) continue break data.extend(revisions.response['data']) print('{} builds...'.format(len(data))) after = revisions.response['cursor']['after'] if after is None: break print('Number of buildables:', len(data)) with open(self._BUILD_FILE, 'w') as json_file: json.dump(data, json_file) def get_diffs(self): print('Downloading diffs...') data = [] constraints = { 'revisionPHIDs': [r.phid for r in self.revisions.values()] } after = None while True: # retry on timeouts fail_count = 0 while True: try: diffs = self.phab.differential.diff.search( constraints=constraints, after=after) except socket.timeout: fail_count +=1 if fail_count > 5: raise time.sleep(10) continue break data.extend(diffs.response['data']) print('{} diffs...'.format(len(data))) after = diffs.response['cursor']['after'] if after is None: break print('Number of diffs:', len(data)) with open(self._DIFF_FILE, 'w') as json_file: json.dump(data, json_file) def parse_revisions(self): with open(self._REVISION_FILE) as revision_file: revision_dict = json.load(revision_file) self.revisions = {r.phid: r for r in (Revision(x) for x in revision_dict)} print('Parsed {} revisions.'.format(len(self.revisions))) def parse_buildables(self): with open(self._BUILDABLE_FILE) as buildables_file: buildable_dict = json.load(buildables_file) self.buildables = {b.phid: b for b in (Buildable(x) for x in buildable_dict)} print('Parsed {} buildables.'.format(len(self.buildables))) def parse_builds(self): with open(self._BUILD_FILE) as build_file: build_dict = json.load(build_file) self.builds = {b.phid: b for b in (Build(x) for x in build_dict)} print('Parsed {} builds.'.format(len(self.builds))) def parse_diffs(self): with open(self._DIFF_FILE) as diff_file: diff_dict = json.load(diff_file) self.diffs = {d.phid: d for d in (Diff(x) for x in diff_dict)} print('Parsed {} diffs.'.format(len(self.diffs))) def link_objects(self): for build in (b for b in self.builds.values()): buildable = self.buildables[build.buildable_phid] build.buildable = buildable buildable.builds.append(build) for buildable in self.buildables.values(): revision = self.revisions[buildable.revison_phid] revision.buildables.append(buildable) buildable.revision = revision for diff in self.diffs.values(): revision = self.revisions[diff.revison_phid] revision.diffs.append(diff) diff.revision = revision def compute_metrics(self, name: str, group_function): print('Creating metrics for {}...'.format(name)) group_dict = {} for revision in self.revisions.values(): group_dict.setdefault(group_function(revision), []).append(revision) csv_file = open(self._PHAB_WEEKLY_METRICS_FILE.format(name), 'w') fieldnames = [name, '# revisions', '# tested revisions', '% tested revisions', '# untested revisions', '# revisions without builds', '% revisions without builds', '# no repository set', '# had failed builds', '% had failed builds', '# failed first then passed', '% failed first then passed', '# published failing', '% published failing', '# all passed', '% all passed'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel) writer.writeheader() for group in sorted(group_dict.keys()): revisions = group_dict[group] # type: List[Revision] num_revisions = len(revisions) num_premt_revisions = len([r for r in revisions if r.was_premerge_tested]) precentage_premt_revisions = 100.0 * num_premt_revisions / num_revisions num_no_build_triggered = len([r for r in revisions if len(r.builds) == 0]) percent_no_build_triggered = 100.0 * num_no_build_triggered / num_revisions num_no_repo = len([r for r in revisions if r.repository_phid is None]) num_had_failed_builds =len([r for r in revisions if r.has_failed_builds]) num_failed_first_then_passed = len([r for r in revisions if r.builds_finally_succeeded]) num_published_failing = len([r for r in revisions if r.published_failing]) num_all_passed = len([r for r in revisions if r.all_builds_passed]) writer.writerow({ name: group, '# revisions': num_revisions, '# tested revisions': num_premt_revisions, '% tested revisions': precentage_premt_revisions, '# untested revisions': num_revisions - num_premt_revisions, '# revisions without builds': num_no_build_triggered, '% revisions without builds': percent_no_build_triggered, '# no repository set': num_no_repo, '# had failed builds': num_had_failed_builds, '% had failed builds': 100 * num_had_failed_builds / num_revisions, '# failed first then passed': num_failed_first_then_passed, '% failed first then passed': 100 * num_failed_first_then_passed / num_revisions, '# published failing': num_published_failing, '% published failing': 100 * num_published_failing / num_revisions, '# all passed': num_all_passed, '% all passed': 100*num_all_passed / num_revisions, }) def count_base_revisions(self): base_revisions = {} base_branches = {} for diff in self.diffs.values(): base_revisions.setdefault(diff.base_revision, 0) base_revisions[diff.base_revision] += 1 base_branches.setdefault(diff.base_branch, 0) base_branches[diff.base_branch] +=1 print(f'{len(self.diffs)} diffs are using {len(base_revisions)} different git base revisions.') print('The top 10 revisions and their usages are:') revisions = sorted( base_revisions.items(), key=lambda x: x[1] , reverse=True) for i in revisions[:10]: print(f' commit {i[0]} was used {i[1]} times') print() print(f'{len(self.diffs)} diffs are using {len(base_branches)} different git base branches') branches = sorted( base_branches.items(), key=lambda x: x[1] , reverse=True) print('The top 10 branches and their usages are:') for i in branches[:10]: print(f' branch {i[0]} was used {i[1]} times') print() def match_base_revisions_with_repo(self, repo_path: str): repo = git.Repo(repo_path) not_found = 0 invalid_date = 0 has_base_revision = 0 for diff in self.diffs.values(): revision = diff.base_revision if revision is None: continue has_base_revision += 1 try: commit = repo.commit(revision) except (ValueError, git.BadName): not_found += 1 continue commited_date = datetime.datetime.fromtimestamp(commit.committed_date) if commited_date > diff.dateCreated: invalid_date += 1 print() print(f'Of the {has_base_revision} Diffs with base revision, the base revision was NOT found in the repo for {not_found} and ') print(f'{invalid_date} base revisions were used before being available upstream.') print(f'So {(not_found+invalid_date)/has_base_revision*100:0.2f} % of specified the base revisions were unusable.') def revision_statistics(self): no_builds = 0 has_failed = 0 fail_then_pass = 0 all_passed = 0 fail_last = 0 for revision in self.revisions.values(): build_status = [b.passed for b in revision.builds] if len(revision.builds) == 0: no_builds += 1 continue if False in build_status: has_failed += 1 if build_status[-1] == True: fail_then_pass +=1 else: all_passed += 1 if revision.published and build_status[-1] == False: fail_last += 1 print() print(f'Out of the {len(self.revisions)} Revisions:') print(f' {no_builds} had no builds.') print(f' {has_failed} had failed builds.') print(f' {fail_then_pass} had failed builds, but the last build passed.') print(f' {all_passed} had only successful builds.') print(f' {fail_last} were published with a failing build.') if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('repo_path') args = parser.parse_args() puller = PhabBuildPuller(args.repo_path) puller.run()