1
0
Fork 0
llvm-premerge-checks/scripts/metrics/pull_phab_build_stats.py
2020-08-13 13:14:54 +02:00

541 lines
20 KiB
Python
Executable file

#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Get data on Revisions and builds from Phabricator
import phabricator
import json
import os
import datetime
from typing import Dict, List, Optional
import csv
import time
import socket
import git
import argparse
# PHIDs of build plans used for pre-merge testing
# FIXME: how do you get these?
_PRE_MERGE_PHIDs = ['PHID-HMCP-bfkbtacsszhg3feydpo6', # beta testers
'PHID-HMCP-qbviyekvgirhhhvkzpsn', # public pre-merge tests
'PHID-HMCP-p2oc4ocen3l2yzymvg2l',
]
# query all data after this date
START_DATE = datetime.date(year=2019, month=10, day=1)
class PhabResponse:
def __init__(self, revision_dict: Dict):
self.revision_dict = revision_dict
@property
def id(self) -> str:
return self.revision_dict['id']
@property
def phid(self) -> str:
return self.revision_dict['phid']
def __str__(self):
return str(self.revision_dict)
class Revision(PhabResponse):
def __init__(self, revision_dict):
super().__init__(revision_dict)
self.buildables = [] # type: List['Buildable']
self.diffs = [] # type: List['Diff']
@property
def status(self) -> str:
return self.revision_dict['fields']['status']['value']
@property
def builds(self) -> List['Build']:
builds = []
for b in self.buildables:
builds.extend(b.builds)
return builds
@property
def created_date(self):
return self.revision_dict['fields']['dateCreated']
@property
def was_premerge_tested(self) -> bool:
return any((b.was_premerge_tested for b in self.builds))
@property
def repository_phid(self) -> str:
return self.revision_dict['fields']['repositoryPHID']
@property
def diff_phid(self) -> str:
return self.revision_dict['fields']['diffPHID']
@property
def all_diffs_have_refs(self) -> bool:
return not any(not d.has_refs for d in self.diffs)
@property
def day(self) -> datetime.date:
return datetime.date.fromtimestamp(self.created_date)
@property
def week(self) -> str:
day = self.day
return'{}-w{:02d}'.format(day.year, day.isocalendar()[1])
@property
def published(self) -> bool:
return self.status == 'published'
@property
def build_status_list(self) -> List[bool]:
return [b.passed for b in self.builds if b.was_premerge_tested]
@property
def builds_finally_succeeded(self) -> bool:
"""Return true iff one of the builds failed and the last build passed."""
return self.has_failed_builds and self.build_status_list[-1]
@property
def has_failed_builds(self) -> bool:
"""Return true iff one of the builds failed."""
return False in self.build_status_list
@property
def published_failing(self) -> bool:
"""Return true iff published and the last build failed."""
return self.was_premerge_tested and self.published \
and not self.build_status_list[-1]
@property
def all_builds_passed(self) -> bool:
return self.was_premerge_tested and all(self.build_status_list)
@property
def all_builds_failed(self) -> bool:
return self.was_premerge_tested and all(not b for b in self.build_status_list)
@property
def dateModified(self) -> datetime.datetime:
return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateModified'])
class Buildable(PhabResponse):
def __init__(self, revision_dict):
super().__init__(revision_dict)
self.builds = [] # type: List[Build]
self.revision = None # type: Optional[Revision]
@property
def diff_phid(self) -> str:
return self.revision_dict['buildablePHID']
@property
def revison_phid(self) -> str:
return self.revision_dict['containerPHID']
class Build(PhabResponse):
def __init__(self, revision_dict):
super().__init__(revision_dict)
self.buildable = None # type: Optional[Buildable]
@property
def buildable_phid(self) -> str:
return self.revision_dict['fields']['buildablePHID']
@property
def buildplan_phid(self) -> str:
return self.revision_dict['fields']['buildPlanPHID']
@property
def was_premerge_tested(self) -> bool:
result = self.buildplan_phid in _PRE_MERGE_PHIDs
return result
@property
def passed(self) -> bool:
"""Returns true, if the build "passed" """
return self.was_premerge_tested and self.revision_dict['fields']['buildStatus']['value'] == 'passed'
@property
def dateModified(self) ->datetime.datetime:
return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateModified'])
class Diff(PhabResponse):
def __init__(self, revision_dict):
super().__init__(revision_dict)
self.revision = None # type: Optional[Revision]
@property
def revison_phid(self) -> str:
return self.revision_dict['fields']['revisionPHID']
@property
def _refs(self) -> List:
return self.revision_dict['fields']['refs']
@property
def has_refs(self) -> bool:
return len(self._refs) > 0
@property
def base_revision(self) -> str:
for ref in self._refs:
if ref['type'] == 'base':
return ref['identifier']
return None
@property
def base_branch(self) -> str:
for ref in self._refs:
if ref['type'] == 'branch':
return ref['name']
return None
@property
def dateCreated(self) -> datetime.datetime:
return datetime.datetime.fromtimestamp(self.revision_dict['fields']['dateCreated'])
class PhabBuildPuller:
# files/folder for sotring temporary results
_TMP_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'tmp'))
_REVISION_FILE = os.path.join(_TMP_DIR, 'phab-revisions.json')
_BUILDABLE_FILE = os.path.join(_TMP_DIR, 'phab-buildables.json')
_BUILD_FILE = os.path.join(_TMP_DIR, 'phab-build.json')
_DIFF_FILE = os.path.join(_TMP_DIR, 'phab-diffs.json')
_PHAB_WEEKLY_METRICS_FILE = os.path.join(_TMP_DIR, 'phabricator_{}.csv')
def __init__(self, repo_path: str):
self.conduit_token = None
self.host = None
self.phab = self._create_phab()
self._repo_path = repo_path # type: str
self.revisions = {} # type: Dict[str, Revision]
self.buildables = {} # type: Dict[str, Buildable]
self.builds = {} # type: Dict[str, Build]
self.diffs = {} # type: Dict[str, Diff]
def _create_phab(self) -> phabricator.Phabricator:
phab = phabricator.Phabricator(token=self.conduit_token, host=self.host)
phab.update_interfaces()
return phab
def _load_arcrc(self):
"""Load arc configuration from file if not set."""
if self.conduit_token is not None or self.host is not None:
return
print('Loading configuration from ~/.arcrc file')
with open(os.path.expanduser('~/.arcrc'), 'r') as arcrc_file:
arcrc = json.load(arcrc_file)
# use the first host configured in the file
self.host = next(iter(arcrc['hosts']))
self.conduit_token = arcrc['hosts'][self.host]['token']
def run(self):
if not os.path.exists(self._TMP_DIR):
os.mkdir(self._TMP_DIR)
if not os.path.isfile(self._REVISION_FILE):
self.get_revisions()
self.parse_revisions()
if not os.path.isfile(self._BUILDABLE_FILE):
self.get_buildables()
self.parse_buildables()
if not os.path.isfile(self._BUILD_FILE):
self.get_builds()
self.parse_builds()
if not os.path.isfile(self._DIFF_FILE):
self.get_diffs()
self.parse_diffs()
self.link_objects()
self.compute_metrics('day', lambda r: r.day)
self.compute_metrics('week', lambda r: r.week)
self.count_base_revisions()
self.revision_statistics()
self.match_base_revisions_with_repo(self._repo_path)
def get_revisions(self):
print('Downloading revisions starting...')
from_date = int(START_DATE.strftime('%s'))
data = []
constraints = {
'createdStart': from_date
}
# FIXME: lots of code duplication around pagination and error handling.
# find a way to separate this into a function.
after = None
while True:
revisions = self.phab.differential.revision.search(
constraints=constraints, after=after)
data.extend(revisions.response['data'])
print('{} revisions...'.format(len(data)))
after = revisions.response['cursor']['after']
if after is None:
break
print('Number of revisions:', len(data))
with open(self._REVISION_FILE, 'w') as json_file:
json.dump(data, json_file)
def get_buildables(self):
print('Downloading buildables...')
data = []
after = None
while True:
revisions = self.phab.harbormaster.querybuildables(
containerPHIDs=[r.phid for r in self.revisions.values()], after=after)
data.extend(revisions.response['data'])
print('{} buildables...'.format(len(data)))
after = revisions.response['cursor']['after']
if after is None:
break
print('Number of buildables:', len(data))
with open(self._BUILDABLE_FILE, 'w') as json_file:
json.dump(data, json_file)
def get_builds(self):
print('Downloading builds...')
data = []
constraints = {
'buildables': [r.phid for r in self.buildables.values()]
}
after = None
while True:
# retry on timeouts
fail_count = 0
while True:
try:
revisions = self.phab.harbormaster.build.search(
constraints=constraints, after=after)
except socket.timeout:
fail_count +=1
if fail_count > 5:
raise
time.sleep(10)
continue
break
data.extend(revisions.response['data'])
print('{} builds...'.format(len(data)))
after = revisions.response['cursor']['after']
if after is None:
break
print('Number of buildables:', len(data))
with open(self._BUILD_FILE, 'w') as json_file:
json.dump(data, json_file)
def get_diffs(self):
print('Downloading diffs...')
data = []
constraints = {
'revisionPHIDs': [r.phid for r in self.revisions.values()]
}
after = None
while True:
# retry on timeouts
fail_count = 0
while True:
try:
diffs = self.phab.differential.diff.search(
constraints=constraints, after=after)
except socket.timeout:
fail_count +=1
if fail_count > 5:
raise
time.sleep(10)
continue
break
data.extend(diffs.response['data'])
print('{} diffs...'.format(len(data)))
after = diffs.response['cursor']['after']
if after is None:
break
print('Number of diffs:', len(data))
with open(self._DIFF_FILE, 'w') as json_file:
json.dump(data, json_file)
def parse_revisions(self):
with open(self._REVISION_FILE) as revision_file:
revision_dict = json.load(revision_file)
self.revisions = {r.phid: r for r in (Revision(x) for x in revision_dict)}
print('Parsed {} revisions.'.format(len(self.revisions)))
def parse_buildables(self):
with open(self._BUILDABLE_FILE) as buildables_file:
buildable_dict = json.load(buildables_file)
self.buildables = {b.phid: b for b in (Buildable(x) for x in buildable_dict)}
print('Parsed {} buildables.'.format(len(self.buildables)))
def parse_builds(self):
with open(self._BUILD_FILE) as build_file:
build_dict = json.load(build_file)
self.builds = {b.phid: b for b in (Build(x) for x in build_dict)}
print('Parsed {} builds.'.format(len(self.builds)))
def parse_diffs(self):
with open(self._DIFF_FILE) as diff_file:
diff_dict = json.load(diff_file)
self.diffs = {d.phid: d for d in (Diff(x) for x in diff_dict)}
print('Parsed {} diffs.'.format(len(self.diffs)))
def link_objects(self):
for build in (b for b in self.builds.values()):
buildable = self.buildables[build.buildable_phid]
build.buildable = buildable
buildable.builds.append(build)
for buildable in self.buildables.values():
revision = self.revisions[buildable.revison_phid]
revision.buildables.append(buildable)
buildable.revision = revision
for diff in self.diffs.values():
revision = self.revisions[diff.revison_phid]
revision.diffs.append(diff)
diff.revision = revision
def compute_metrics(self, name: str, group_function):
print('Creating metrics for {}...'.format(name))
group_dict = {}
for revision in self.revisions.values():
group_dict.setdefault(group_function(revision), []).append(revision)
csv_file = open(self._PHAB_WEEKLY_METRICS_FILE.format(name), 'w')
fieldnames = [name, '# revisions', '# tested revisions', '% tested revisions', '# untested revisions',
'# revisions without builds', '% revisions without builds', '# no repository set',
'# had failed builds', '% had failed builds', '# failed first then passed',
'% failed first then passed', '# published failing', '% published failing',
'# all passed', '% all passed']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel)
writer.writeheader()
for group in sorted(group_dict.keys()):
revisions = group_dict[group] # type: List[Revision]
num_revisions = len(revisions)
num_premt_revisions = len([r for r in revisions if r.was_premerge_tested])
precentage_premt_revisions = 100.0 * num_premt_revisions / num_revisions
num_no_build_triggered = len([r for r in revisions if len(r.builds) == 0])
percent_no_build_triggered = 100.0 * num_no_build_triggered / num_revisions
num_no_repo = len([r for r in revisions if r.repository_phid is None])
num_had_failed_builds =len([r for r in revisions if r.has_failed_builds])
num_failed_first_then_passed = len([r for r in revisions if r.builds_finally_succeeded])
num_published_failing = len([r for r in revisions if r.published_failing])
num_all_passed = len([r for r in revisions if r.all_builds_passed])
writer.writerow({
name: group,
'# revisions': num_revisions,
'# tested revisions': num_premt_revisions,
'% tested revisions': precentage_premt_revisions,
'# untested revisions': num_revisions - num_premt_revisions,
'# revisions without builds': num_no_build_triggered,
'% revisions without builds': percent_no_build_triggered,
'# no repository set': num_no_repo,
'# had failed builds': num_had_failed_builds,
'% had failed builds': 100 * num_had_failed_builds / num_revisions,
'# failed first then passed': num_failed_first_then_passed,
'% failed first then passed': 100 * num_failed_first_then_passed / num_revisions,
'# published failing': num_published_failing,
'% published failing': 100 * num_published_failing / num_revisions,
'# all passed': num_all_passed,
'% all passed': 100*num_all_passed / num_revisions,
})
def count_base_revisions(self):
base_revisions = {}
base_branches = {}
for diff in self.diffs.values():
base_revisions.setdefault(diff.base_revision, 0)
base_revisions[diff.base_revision] += 1
base_branches.setdefault(diff.base_branch, 0)
base_branches[diff.base_branch] +=1
print(f'{len(self.diffs)} diffs are using {len(base_revisions)} different git base revisions.')
print('The top 10 revisions and their usages are:')
revisions = sorted( base_revisions.items(), key=lambda x: x[1] , reverse=True)
for i in revisions[:10]:
print(f' commit {i[0]} was used {i[1]} times')
print()
print(f'{len(self.diffs)} diffs are using {len(base_branches)} different git base branches')
branches = sorted( base_branches.items(), key=lambda x: x[1] , reverse=True)
print('The top 10 branches and their usages are:')
for i in branches[:10]:
print(f' branch {i[0]} was used {i[1]} times')
print()
def match_base_revisions_with_repo(self, repo_path: str):
repo = git.Repo(repo_path)
not_found = 0
invalid_date = 0
has_base_revision = 0
for diff in self.diffs.values():
revision = diff.base_revision
if revision is None:
continue
has_base_revision += 1
try:
commit = repo.commit(revision)
except (ValueError, git.BadName):
not_found += 1
continue
commited_date = datetime.datetime.fromtimestamp(commit.committed_date)
if commited_date > diff.dateCreated:
invalid_date += 1
print()
print(f'Of the {has_base_revision} Diffs with base revision, the base revision was NOT found in the repo for {not_found} and ')
print(f'{invalid_date} base revisions were used before being available upstream.')
print(f'So {(not_found+invalid_date)/has_base_revision*100:0.2f} % of specified the base revisions were unusable.')
def revision_statistics(self):
no_builds = 0
has_failed = 0
fail_then_pass = 0
all_passed = 0
fail_last = 0
for revision in self.revisions.values():
build_status = [b.passed for b in revision.builds]
if len(revision.builds) == 0:
no_builds += 1
continue
if False in build_status:
has_failed += 1
if build_status[-1] == True:
fail_then_pass +=1
else:
all_passed += 1
if revision.published and build_status[-1] == False:
fail_last += 1
print()
print(f'Out of the {len(self.revisions)} Revisions:')
print(f' {no_builds} had no builds.')
print(f' {has_failed} had failed builds.')
print(f' {fail_then_pass} had failed builds, but the last build passed.')
print(f' {all_passed} had only successful builds.')
print(f' {fail_last} were published with a failing build.')
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('repo_path')
args = parser.parse_args()
puller = PhabBuildPuller(args.repo_path)
puller.run()