1
0
Fork 0

counting failed builds on buildbots

storing results on Stackdriver
This commit is contained in:
Christian Kühnel 2020-02-17 13:10:52 +01:00
parent dd375f71db
commit ad0ad7ccc5
3 changed files with 155 additions and 5 deletions

View file

@ -5,15 +5,16 @@ a set of metrics. This doc will summarize the metrics and tools. All of the data
shall be collected as time series, so that we can see changes over time.
* Impact - The metrics we ultimately want to improve
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master failing.
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master
failing. (Buildbot_percentage_failing)
* Time to fix a broken master build: Time between start of failing builds
until the build is fixed.
until the build is fixed. (BuildBot_time_to_fix)
* Percentage of Revisions on Phabricator where a broken build was fixed
afterwards. This would indicate that a bug was found and fixed during
the code review phase.
the code review phase. (Premerge_fixes)
* Number of reverts on master. This indicates that something was broken on
master that slipped through the pre-merge tests or was submitted without
any review.
any review. (Upstream_reverts)
* Users and behavior - Interesting to see and useful to adapt our approach.
* Percentage of commits to master that went through Phabricator.
@ -45,6 +46,13 @@ shall be collected as time series, so that we can see changes over time.
* Send out alerts/notifications.
* Show live data in charts.
# Data sources
This section will explain where we can get the data from.
* build bot statistics
# Solution
We need to find solutions for these parts:

141
scripts/metrics/buildbots.py Executable file
View file

@ -0,0 +1,141 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from datetime import date
import requests
import datetime
from google.cloud import monitoring_v3
BASE_URL = 'http://lab.llvm.org:8011/json/builders'
GCP_PROJECT_ID = 'llvm-premerge-checks'
class BuildStats:
"""Build statistics.
Plain data object.
"""
successful = 0 # type: int
failed = 0 # type: int
def __init__(self, successful:int = 0, failed:int = 0):
self.successful = successful
self.failed = failed
def add(self, success: bool):
if success:
self.successful += 1
else:
self.failed += 1
@property
def total(self) -> int:
return self.successful + self.failed
@property
def percent_failed(self) -> float:
return 100.0 * self.failed / self.total
def __add__(self, other: "BuildStats") -> "BuildStats":
return BuildStats(
self.successful + other.successful,
self.failed + other.failed)
def __str__(self) -> str:
result = [
'successful: {}'.format(self.successful),
'failed: {}'.format(self.failed),
'total: {}'.format(self.total),
'% failed: {:0.1f}'.format(self.percent_failed),
]
return '\n'.join(result)
def get_buildbot_stats(time_window : datetime.datetime) -> BuildStats:
"""Get the statistics for the all builders."""
print('getting list of builders...')
stats = BuildStats()
for builder in requests.get(BASE_URL).json().keys():
# TODO: maybe filter the builds to the ones we care about
stats += get_builder_stats(builder, time_window )
return stats
def get_builder_stats(builder: str, time_window: datetime.datetime) -> BuildStats:
"""Get the statistics for one builder."""
print('Gettings builds for {}...'.format(builder))
# TODO: can we limit the data we're requesting?
url = '{}/{}/builds/_all'.format(BASE_URL, builder)
stats = BuildStats()
for build, results in requests.get(url).json().items():
start_time = datetime.datetime.fromtimestamp(float(results['times'][0]))
if start_time < time_window:
continue
successful = results['text'] == ['build', 'successful']
stats.add(successful)
return stats
def gcp_create_metric_descriptor(project_id: str):
"""Create metric descriptors on Stackdriver.
Re-creating these with every call is fine."""
client = monitoring_v3.MetricServiceClient()
project_name = client.project_path(project_id)
for desc_type, desc_desc in [
["buildbots_percent_failed", "Percentage of failed builds"],
["buildbots_builds_successful", "Number of successful builds in the last 24h."],
["buildbots_builds_failed", "Number of failed builds in the last 24h."],
["buildbots_builds_total", "Total number of builds in the last 24h."],
]:
descriptor = monitoring_v3.types.MetricDescriptor()
descriptor.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
descriptor.metric_kind = (
monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE)
descriptor.value_type = (
monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE)
descriptor.description = desc_desc
descriptor = client.create_metric_descriptor(project_name, descriptor)
print('Created {}.'.format(descriptor.name))
def gcp_write_data(project_id: str, stats: BuildStats):
"""Upload metrics to Stackdriver."""
client = monitoring_v3.MetricServiceClient()
project_name = client.project_path(project_id)
now = datetime.datetime.now()
for desc_type, value in [
["buildbots_percent_failed", stats.percent_failed],
["buildbots_builds_successful", stats.successful],
["buildbots_builds_failed", stats.failed],
["buildbots_builds_total", stats.total],
]:
series = monitoring_v3.types.TimeSeries()
series.metric.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
series.resource.type = 'global'
point = series.points.add()
point.value.double_value = value
point.interval.end_time.seconds = int(now.timestamp())
client.create_time_series(project_name, [series])
if __name__ == '__main__':
gcp_create_metric_descriptor(GCP_PROJECT_ID)
stats = get_buildbot_stats(
datetime.datetime.now() - datetime.timedelta(hours=24))
gcp_write_data(GCP_PROJECT_ID, stats)
print(stats)

View file

@ -5,3 +5,4 @@ retrying==1.3.3
pathspec==0.7.0
pyaml==19.12.0
unidiff==0.5.5
requests==2.22.0