counting failed builds on buildbots
storing results on Stackdriver
This commit is contained in:
parent
dd375f71db
commit
ad0ad7ccc5
3 changed files with 155 additions and 5 deletions
|
@ -5,15 +5,16 @@ a set of metrics. This doc will summarize the metrics and tools. All of the data
|
||||||
shall be collected as time series, so that we can see changes over time.
|
shall be collected as time series, so that we can see changes over time.
|
||||||
|
|
||||||
* Impact - The metrics we ultimately want to improve
|
* Impact - The metrics we ultimately want to improve
|
||||||
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master failing.
|
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master
|
||||||
|
failing. (Buildbot_percentage_failing)
|
||||||
* Time to fix a broken master build: Time between start of failing builds
|
* Time to fix a broken master build: Time between start of failing builds
|
||||||
until the build is fixed.
|
until the build is fixed. (BuildBot_time_to_fix)
|
||||||
* Percentage of Revisions on Phabricator where a broken build was fixed
|
* Percentage of Revisions on Phabricator where a broken build was fixed
|
||||||
afterwards. This would indicate that a bug was found and fixed during
|
afterwards. This would indicate that a bug was found and fixed during
|
||||||
the code review phase.
|
the code review phase. (Premerge_fixes)
|
||||||
* Number of reverts on master. This indicates that something was broken on
|
* Number of reverts on master. This indicates that something was broken on
|
||||||
master that slipped through the pre-merge tests or was submitted without
|
master that slipped through the pre-merge tests or was submitted without
|
||||||
any review.
|
any review. (Upstream_reverts)
|
||||||
|
|
||||||
* Users and behavior - Interesting to see and useful to adapt our approach.
|
* Users and behavior - Interesting to see and useful to adapt our approach.
|
||||||
* Percentage of commits to master that went through Phabricator.
|
* Percentage of commits to master that went through Phabricator.
|
||||||
|
@ -45,6 +46,13 @@ shall be collected as time series, so that we can see changes over time.
|
||||||
* Send out alerts/notifications.
|
* Send out alerts/notifications.
|
||||||
* Show live data in charts.
|
* Show live data in charts.
|
||||||
|
|
||||||
|
|
||||||
|
# Data sources
|
||||||
|
|
||||||
|
This section will explain where we can get the data from.
|
||||||
|
|
||||||
|
* build bot statistics
|
||||||
|
|
||||||
# Solution
|
# Solution
|
||||||
|
|
||||||
We need to find solutions for these parts:
|
We need to find solutions for these parts:
|
||||||
|
|
141
scripts/metrics/buildbots.py
Executable file
141
scripts/metrics/buildbots.py
Executable file
|
@ -0,0 +1,141 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# Copyright 2019 Google LLC
|
||||||
|
#
|
||||||
|
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://llvm.org/LICENSE.txt
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
from datetime import date
|
||||||
|
import requests
|
||||||
|
import datetime
|
||||||
|
from google.cloud import monitoring_v3
|
||||||
|
|
||||||
|
BASE_URL = 'http://lab.llvm.org:8011/json/builders'
|
||||||
|
GCP_PROJECT_ID = 'llvm-premerge-checks'
|
||||||
|
|
||||||
|
class BuildStats:
|
||||||
|
"""Build statistics.
|
||||||
|
|
||||||
|
Plain data object.
|
||||||
|
"""
|
||||||
|
|
||||||
|
successful = 0 # type: int
|
||||||
|
failed = 0 # type: int
|
||||||
|
|
||||||
|
def __init__(self, successful:int = 0, failed:int = 0):
|
||||||
|
self.successful = successful
|
||||||
|
self.failed = failed
|
||||||
|
|
||||||
|
def add(self, success: bool):
|
||||||
|
if success:
|
||||||
|
self.successful += 1
|
||||||
|
else:
|
||||||
|
self.failed += 1
|
||||||
|
|
||||||
|
@property
|
||||||
|
def total(self) -> int:
|
||||||
|
return self.successful + self.failed
|
||||||
|
|
||||||
|
@property
|
||||||
|
def percent_failed(self) -> float:
|
||||||
|
return 100.0 * self.failed / self.total
|
||||||
|
|
||||||
|
def __add__(self, other: "BuildStats") -> "BuildStats":
|
||||||
|
return BuildStats(
|
||||||
|
self.successful + other.successful,
|
||||||
|
self.failed + other.failed)
|
||||||
|
|
||||||
|
def __str__(self) -> str:
|
||||||
|
result = [
|
||||||
|
'successful: {}'.format(self.successful),
|
||||||
|
'failed: {}'.format(self.failed),
|
||||||
|
'total: {}'.format(self.total),
|
||||||
|
'% failed: {:0.1f}'.format(self.percent_failed),
|
||||||
|
]
|
||||||
|
return '\n'.join(result)
|
||||||
|
|
||||||
|
|
||||||
|
def get_buildbot_stats(time_window : datetime.datetime) -> BuildStats:
|
||||||
|
"""Get the statistics for the all builders."""
|
||||||
|
print('getting list of builders...')
|
||||||
|
stats = BuildStats()
|
||||||
|
for builder in requests.get(BASE_URL).json().keys():
|
||||||
|
# TODO: maybe filter the builds to the ones we care about
|
||||||
|
stats += get_builder_stats(builder, time_window )
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def get_builder_stats(builder: str, time_window: datetime.datetime) -> BuildStats:
|
||||||
|
"""Get the statistics for one builder."""
|
||||||
|
print('Gettings builds for {}...'.format(builder))
|
||||||
|
# TODO: can we limit the data we're requesting?
|
||||||
|
url = '{}/{}/builds/_all'.format(BASE_URL, builder)
|
||||||
|
stats = BuildStats()
|
||||||
|
for build, results in requests.get(url).json().items():
|
||||||
|
start_time = datetime.datetime.fromtimestamp(float(results['times'][0]))
|
||||||
|
if start_time < time_window:
|
||||||
|
continue
|
||||||
|
successful = results['text'] == ['build', 'successful']
|
||||||
|
stats.add(successful)
|
||||||
|
return stats
|
||||||
|
|
||||||
|
|
||||||
|
def gcp_create_metric_descriptor(project_id: str):
|
||||||
|
"""Create metric descriptors on Stackdriver.
|
||||||
|
|
||||||
|
Re-creating these with every call is fine."""
|
||||||
|
client = monitoring_v3.MetricServiceClient()
|
||||||
|
project_name = client.project_path(project_id)
|
||||||
|
|
||||||
|
for desc_type, desc_desc in [
|
||||||
|
["buildbots_percent_failed", "Percentage of failed builds"],
|
||||||
|
["buildbots_builds_successful", "Number of successful builds in the last 24h."],
|
||||||
|
["buildbots_builds_failed", "Number of failed builds in the last 24h."],
|
||||||
|
["buildbots_builds_total", "Total number of builds in the last 24h."],
|
||||||
|
]:
|
||||||
|
|
||||||
|
descriptor = monitoring_v3.types.MetricDescriptor()
|
||||||
|
descriptor.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
|
||||||
|
descriptor.metric_kind = (
|
||||||
|
monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE)
|
||||||
|
descriptor.value_type = (
|
||||||
|
monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE)
|
||||||
|
descriptor.description = desc_desc
|
||||||
|
descriptor = client.create_metric_descriptor(project_name, descriptor)
|
||||||
|
print('Created {}.'.format(descriptor.name))
|
||||||
|
|
||||||
|
|
||||||
|
def gcp_write_data(project_id: str, stats: BuildStats):
|
||||||
|
"""Upload metrics to Stackdriver."""
|
||||||
|
client = monitoring_v3.MetricServiceClient()
|
||||||
|
project_name = client.project_path(project_id)
|
||||||
|
now = datetime.datetime.now()
|
||||||
|
|
||||||
|
for desc_type, value in [
|
||||||
|
["buildbots_percent_failed", stats.percent_failed],
|
||||||
|
["buildbots_builds_successful", stats.successful],
|
||||||
|
["buildbots_builds_failed", stats.failed],
|
||||||
|
["buildbots_builds_total", stats.total],
|
||||||
|
]:
|
||||||
|
series = monitoring_v3.types.TimeSeries()
|
||||||
|
series.metric.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
|
||||||
|
series.resource.type = 'global'
|
||||||
|
point = series.points.add()
|
||||||
|
point.value.double_value = value
|
||||||
|
point.interval.end_time.seconds = int(now.timestamp())
|
||||||
|
client.create_time_series(project_name, [series])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
gcp_create_metric_descriptor(GCP_PROJECT_ID)
|
||||||
|
stats = get_buildbot_stats(
|
||||||
|
datetime.datetime.now() - datetime.timedelta(hours=24))
|
||||||
|
gcp_write_data(GCP_PROJECT_ID, stats)
|
||||||
|
print(stats)
|
|
@ -4,4 +4,5 @@ gitpython==3.0.5
|
||||||
retrying==1.3.3
|
retrying==1.3.3
|
||||||
pathspec==0.7.0
|
pathspec==0.7.0
|
||||||
pyaml==19.12.0
|
pyaml==19.12.0
|
||||||
unidiff==0.5.5
|
unidiff==0.5.5
|
||||||
|
requests==2.22.0
|
Loading…
Reference in a new issue