From ad0ad7ccc558543ca9c16ee7fef4cd9ca78f8fc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=BChnel?= Date: Mon, 17 Feb 2020 13:10:52 +0100 Subject: [PATCH] counting failed builds on buildbots storing results on Stackdriver --- scripts/metrics/README.md | 16 +++- scripts/metrics/buildbots.py | 141 +++++++++++++++++++++++++++++++++++ scripts/requirements.txt | 3 +- 3 files changed, 155 insertions(+), 5 deletions(-) create mode 100755 scripts/metrics/buildbots.py diff --git a/scripts/metrics/README.md b/scripts/metrics/README.md index 49baf28..b9e930f 100644 --- a/scripts/metrics/README.md +++ b/scripts/metrics/README.md @@ -5,15 +5,16 @@ a set of metrics. This doc will summarize the metrics and tools. All of the data shall be collected as time series, so that we can see changes over time. * Impact - The metrics we ultimately want to improve - * Percentage of [build-bot build](http://lab.llvm.org:8011/) on master failing. + * Percentage of [build-bot build](http://lab.llvm.org:8011/) on master + failing. (Buildbot_percentage_failing) * Time to fix a broken master build: Time between start of failing builds - until the build is fixed. + until the build is fixed. (BuildBot_time_to_fix) * Percentage of Revisions on Phabricator where a broken build was fixed afterwards. This would indicate that a bug was found and fixed during - the code review phase. + the code review phase. (Premerge_fixes) * Number of reverts on master. This indicates that something was broken on master that slipped through the pre-merge tests or was submitted without - any review. + any review. (Upstream_reverts) * Users and behavior - Interesting to see and useful to adapt our approach. * Percentage of commits to master that went through Phabricator. @@ -45,6 +46,13 @@ shall be collected as time series, so that we can see changes over time. * Send out alerts/notifications. * Show live data in charts. + +# Data sources + +This section will explain where we can get the data from. + +* build bot statistics + # Solution We need to find solutions for these parts: diff --git a/scripts/metrics/buildbots.py b/scripts/metrics/buildbots.py new file mode 100755 index 0000000..a05b704 --- /dev/null +++ b/scripts/metrics/buildbots.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# Copyright 2019 Google LLC +# +# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://llvm.org/LICENSE.txt +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import date +import requests +import datetime +from google.cloud import monitoring_v3 + +BASE_URL = 'http://lab.llvm.org:8011/json/builders' +GCP_PROJECT_ID = 'llvm-premerge-checks' + +class BuildStats: + """Build statistics. + + Plain data object. + """ + + successful = 0 # type: int + failed = 0 # type: int + + def __init__(self, successful:int = 0, failed:int = 0): + self.successful = successful + self.failed = failed + + def add(self, success: bool): + if success: + self.successful += 1 + else: + self.failed += 1 + + @property + def total(self) -> int: + return self.successful + self.failed + + @property + def percent_failed(self) -> float: + return 100.0 * self.failed / self.total + + def __add__(self, other: "BuildStats") -> "BuildStats": + return BuildStats( + self.successful + other.successful, + self.failed + other.failed) + + def __str__(self) -> str: + result = [ + 'successful: {}'.format(self.successful), + 'failed: {}'.format(self.failed), + 'total: {}'.format(self.total), + '% failed: {:0.1f}'.format(self.percent_failed), + ] + return '\n'.join(result) + + +def get_buildbot_stats(time_window : datetime.datetime) -> BuildStats: + """Get the statistics for the all builders.""" + print('getting list of builders...') + stats = BuildStats() + for builder in requests.get(BASE_URL).json().keys(): + # TODO: maybe filter the builds to the ones we care about + stats += get_builder_stats(builder, time_window ) + return stats + + +def get_builder_stats(builder: str, time_window: datetime.datetime) -> BuildStats: + """Get the statistics for one builder.""" + print('Gettings builds for {}...'.format(builder)) + # TODO: can we limit the data we're requesting? + url = '{}/{}/builds/_all'.format(BASE_URL, builder) + stats = BuildStats() + for build, results in requests.get(url).json().items(): + start_time = datetime.datetime.fromtimestamp(float(results['times'][0])) + if start_time < time_window: + continue + successful = results['text'] == ['build', 'successful'] + stats.add(successful) + return stats + + +def gcp_create_metric_descriptor(project_id: str): + """Create metric descriptors on Stackdriver. + + Re-creating these with every call is fine.""" + client = monitoring_v3.MetricServiceClient() + project_name = client.project_path(project_id) + + for desc_type, desc_desc in [ + ["buildbots_percent_failed", "Percentage of failed builds"], + ["buildbots_builds_successful", "Number of successful builds in the last 24h."], + ["buildbots_builds_failed", "Number of failed builds in the last 24h."], + ["buildbots_builds_total", "Total number of builds in the last 24h."], + ]: + + descriptor = monitoring_v3.types.MetricDescriptor() + descriptor.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type) + descriptor.metric_kind = ( + monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE) + descriptor.value_type = ( + monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE) + descriptor.description = desc_desc + descriptor = client.create_metric_descriptor(project_name, descriptor) + print('Created {}.'.format(descriptor.name)) + + +def gcp_write_data(project_id: str, stats: BuildStats): + """Upload metrics to Stackdriver.""" + client = monitoring_v3.MetricServiceClient() + project_name = client.project_path(project_id) + now = datetime.datetime.now() + + for desc_type, value in [ + ["buildbots_percent_failed", stats.percent_failed], + ["buildbots_builds_successful", stats.successful], + ["buildbots_builds_failed", stats.failed], + ["buildbots_builds_total", stats.total], + ]: + series = monitoring_v3.types.TimeSeries() + series.metric.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type) + series.resource.type = 'global' + point = series.points.add() + point.value.double_value = value + point.interval.end_time.seconds = int(now.timestamp()) + client.create_time_series(project_name, [series]) + +if __name__ == '__main__': + gcp_create_metric_descriptor(GCP_PROJECT_ID) + stats = get_buildbot_stats( + datetime.datetime.now() - datetime.timedelta(hours=24)) + gcp_write_data(GCP_PROJECT_ID, stats) + print(stats) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index 511c573..d18f663 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -4,4 +4,5 @@ gitpython==3.0.5 retrying==1.3.3 pathspec==0.7.0 pyaml==19.12.0 -unidiff==0.5.5 \ No newline at end of file +unidiff==0.5.5 +requests==2.22.0 \ No newline at end of file