1
0
Fork 0

counting failed builds on buildbots

storing results on Stackdriver
This commit is contained in:
Christian Kühnel 2020-02-17 13:10:52 +01:00
parent dd375f71db
commit ad0ad7ccc5
3 changed files with 155 additions and 5 deletions

View file

@ -5,15 +5,16 @@ a set of metrics. This doc will summarize the metrics and tools. All of the data
shall be collected as time series, so that we can see changes over time. shall be collected as time series, so that we can see changes over time.
* Impact - The metrics we ultimately want to improve * Impact - The metrics we ultimately want to improve
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master failing. * Percentage of [build-bot build](http://lab.llvm.org:8011/) on master
failing. (Buildbot_percentage_failing)
* Time to fix a broken master build: Time between start of failing builds * Time to fix a broken master build: Time between start of failing builds
until the build is fixed. until the build is fixed. (BuildBot_time_to_fix)
* Percentage of Revisions on Phabricator where a broken build was fixed * Percentage of Revisions on Phabricator where a broken build was fixed
afterwards. This would indicate that a bug was found and fixed during afterwards. This would indicate that a bug was found and fixed during
the code review phase. the code review phase. (Premerge_fixes)
* Number of reverts on master. This indicates that something was broken on * Number of reverts on master. This indicates that something was broken on
master that slipped through the pre-merge tests or was submitted without master that slipped through the pre-merge tests or was submitted without
any review. any review. (Upstream_reverts)
* Users and behavior - Interesting to see and useful to adapt our approach. * Users and behavior - Interesting to see and useful to adapt our approach.
* Percentage of commits to master that went through Phabricator. * Percentage of commits to master that went through Phabricator.
@ -45,6 +46,13 @@ shall be collected as time series, so that we can see changes over time.
* Send out alerts/notifications. * Send out alerts/notifications.
* Show live data in charts. * Show live data in charts.
# Data sources
This section will explain where we can get the data from.
* build bot statistics
# Solution # Solution
We need to find solutions for these parts: We need to find solutions for these parts:

141
scripts/metrics/buildbots.py Executable file
View file

@ -0,0 +1,141 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import date
import requests
import datetime
from google.cloud import monitoring_v3
BASE_URL = 'http://lab.llvm.org:8011/json/builders'
GCP_PROJECT_ID = 'llvm-premerge-checks'
class BuildStats:
"""Build statistics.
Plain data object.
"""
successful = 0 # type: int
failed = 0 # type: int
def __init__(self, successful:int = 0, failed:int = 0):
self.successful = successful
self.failed = failed
def add(self, success: bool):
if success:
self.successful += 1
else:
self.failed += 1
@property
def total(self) -> int:
return self.successful + self.failed
@property
def percent_failed(self) -> float:
return 100.0 * self.failed / self.total
def __add__(self, other: "BuildStats") -> "BuildStats":
return BuildStats(
self.successful + other.successful,
self.failed + other.failed)
def __str__(self) -> str:
result = [
'successful: {}'.format(self.successful),
'failed: {}'.format(self.failed),
'total: {}'.format(self.total),
'% failed: {:0.1f}'.format(self.percent_failed),
]
return '\n'.join(result)
def get_buildbot_stats(time_window : datetime.datetime) -> BuildStats:
"""Get the statistics for the all builders."""
print('getting list of builders...')
stats = BuildStats()
for builder in requests.get(BASE_URL).json().keys():
# TODO: maybe filter the builds to the ones we care about
stats += get_builder_stats(builder, time_window )
return stats
def get_builder_stats(builder: str, time_window: datetime.datetime) -> BuildStats:
"""Get the statistics for one builder."""
print('Gettings builds for {}...'.format(builder))
# TODO: can we limit the data we're requesting?
url = '{}/{}/builds/_all'.format(BASE_URL, builder)
stats = BuildStats()
for build, results in requests.get(url).json().items():
start_time = datetime.datetime.fromtimestamp(float(results['times'][0]))
if start_time < time_window:
continue
successful = results['text'] == ['build', 'successful']
stats.add(successful)
return stats
def gcp_create_metric_descriptor(project_id: str):
"""Create metric descriptors on Stackdriver.
Re-creating these with every call is fine."""
client = monitoring_v3.MetricServiceClient()
project_name = client.project_path(project_id)
for desc_type, desc_desc in [
["buildbots_percent_failed", "Percentage of failed builds"],
["buildbots_builds_successful", "Number of successful builds in the last 24h."],
["buildbots_builds_failed", "Number of failed builds in the last 24h."],
["buildbots_builds_total", "Total number of builds in the last 24h."],
]:
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
descriptor.metric_kind = (
monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE)
descriptor.value_type = (
monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE)
descriptor.description = desc_desc
descriptor = client.create_metric_descriptor(project_name, descriptor)
print('Created {}.'.format(descriptor.name))
def gcp_write_data(project_id: str, stats: BuildStats):
"""Upload metrics to Stackdriver."""
client = monitoring_v3.MetricServiceClient()
project_name = client.project_path(project_id)
now = datetime.datetime.now()
for desc_type, value in [
["buildbots_percent_failed", stats.percent_failed],
["buildbots_builds_successful", stats.successful],
["buildbots_builds_failed", stats.failed],
["buildbots_builds_total", stats.total],
]:
series = monitoring_v3.types.TimeSeries()
series.metric.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
series.resource.type = 'global'
point = series.points.add()
point.value.double_value = value
point.interval.end_time.seconds = int(now.timestamp())
client.create_time_series(project_name, [series])
if __name__ == '__main__':
gcp_create_metric_descriptor(GCP_PROJECT_ID)
stats = get_buildbot_stats(
datetime.datetime.now() - datetime.timedelta(hours=24))
gcp_write_data(GCP_PROJECT_ID, stats)
print(stats)

View file

@ -5,3 +5,4 @@ retrying==1.3.3
pathspec==0.7.0 pathspec==0.7.0
pyaml==19.12.0 pyaml==19.12.0
unidiff==0.5.5 unidiff==0.5.5
requests==2.22.0