counting failed builds on buildbots
storing results on Stackdriver
This commit is contained in:
parent
dd375f71db
commit
ad0ad7ccc5
3 changed files with 155 additions and 5 deletions
|
@ -5,15 +5,16 @@ a set of metrics. This doc will summarize the metrics and tools. All of the data
|
|||
shall be collected as time series, so that we can see changes over time.
|
||||
|
||||
* Impact - The metrics we ultimately want to improve
|
||||
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master failing.
|
||||
* Percentage of [build-bot build](http://lab.llvm.org:8011/) on master
|
||||
failing. (Buildbot_percentage_failing)
|
||||
* Time to fix a broken master build: Time between start of failing builds
|
||||
until the build is fixed.
|
||||
until the build is fixed. (BuildBot_time_to_fix)
|
||||
* Percentage of Revisions on Phabricator where a broken build was fixed
|
||||
afterwards. This would indicate that a bug was found and fixed during
|
||||
the code review phase.
|
||||
the code review phase. (Premerge_fixes)
|
||||
* Number of reverts on master. This indicates that something was broken on
|
||||
master that slipped through the pre-merge tests or was submitted without
|
||||
any review.
|
||||
any review. (Upstream_reverts)
|
||||
|
||||
* Users and behavior - Interesting to see and useful to adapt our approach.
|
||||
* Percentage of commits to master that went through Phabricator.
|
||||
|
@ -45,6 +46,13 @@ shall be collected as time series, so that we can see changes over time.
|
|||
* Send out alerts/notifications.
|
||||
* Show live data in charts.
|
||||
|
||||
|
||||
# Data sources
|
||||
|
||||
This section will explain where we can get the data from.
|
||||
|
||||
* build bot statistics
|
||||
|
||||
# Solution
|
||||
|
||||
We need to find solutions for these parts:
|
||||
|
|
141
scripts/metrics/buildbots.py
Executable file
141
scripts/metrics/buildbots.py
Executable file
|
@ -0,0 +1,141 @@
|
|||
#!/usr/bin/env python3
|
||||
# Copyright 2019 Google LLC
|
||||
#
|
||||
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://llvm.org/LICENSE.txt
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
from datetime import date
|
||||
import requests
|
||||
import datetime
|
||||
from google.cloud import monitoring_v3
|
||||
|
||||
BASE_URL = 'http://lab.llvm.org:8011/json/builders'
|
||||
GCP_PROJECT_ID = 'llvm-premerge-checks'
|
||||
|
||||
class BuildStats:
|
||||
"""Build statistics.
|
||||
|
||||
Plain data object.
|
||||
"""
|
||||
|
||||
successful = 0 # type: int
|
||||
failed = 0 # type: int
|
||||
|
||||
def __init__(self, successful:int = 0, failed:int = 0):
|
||||
self.successful = successful
|
||||
self.failed = failed
|
||||
|
||||
def add(self, success: bool):
|
||||
if success:
|
||||
self.successful += 1
|
||||
else:
|
||||
self.failed += 1
|
||||
|
||||
@property
|
||||
def total(self) -> int:
|
||||
return self.successful + self.failed
|
||||
|
||||
@property
|
||||
def percent_failed(self) -> float:
|
||||
return 100.0 * self.failed / self.total
|
||||
|
||||
def __add__(self, other: "BuildStats") -> "BuildStats":
|
||||
return BuildStats(
|
||||
self.successful + other.successful,
|
||||
self.failed + other.failed)
|
||||
|
||||
def __str__(self) -> str:
|
||||
result = [
|
||||
'successful: {}'.format(self.successful),
|
||||
'failed: {}'.format(self.failed),
|
||||
'total: {}'.format(self.total),
|
||||
'% failed: {:0.1f}'.format(self.percent_failed),
|
||||
]
|
||||
return '\n'.join(result)
|
||||
|
||||
|
||||
def get_buildbot_stats(time_window : datetime.datetime) -> BuildStats:
|
||||
"""Get the statistics for the all builders."""
|
||||
print('getting list of builders...')
|
||||
stats = BuildStats()
|
||||
for builder in requests.get(BASE_URL).json().keys():
|
||||
# TODO: maybe filter the builds to the ones we care about
|
||||
stats += get_builder_stats(builder, time_window )
|
||||
return stats
|
||||
|
||||
|
||||
def get_builder_stats(builder: str, time_window: datetime.datetime) -> BuildStats:
|
||||
"""Get the statistics for one builder."""
|
||||
print('Gettings builds for {}...'.format(builder))
|
||||
# TODO: can we limit the data we're requesting?
|
||||
url = '{}/{}/builds/_all'.format(BASE_URL, builder)
|
||||
stats = BuildStats()
|
||||
for build, results in requests.get(url).json().items():
|
||||
start_time = datetime.datetime.fromtimestamp(float(results['times'][0]))
|
||||
if start_time < time_window:
|
||||
continue
|
||||
successful = results['text'] == ['build', 'successful']
|
||||
stats.add(successful)
|
||||
return stats
|
||||
|
||||
|
||||
def gcp_create_metric_descriptor(project_id: str):
|
||||
"""Create metric descriptors on Stackdriver.
|
||||
|
||||
Re-creating these with every call is fine."""
|
||||
client = monitoring_v3.MetricServiceClient()
|
||||
project_name = client.project_path(project_id)
|
||||
|
||||
for desc_type, desc_desc in [
|
||||
["buildbots_percent_failed", "Percentage of failed builds"],
|
||||
["buildbots_builds_successful", "Number of successful builds in the last 24h."],
|
||||
["buildbots_builds_failed", "Number of failed builds in the last 24h."],
|
||||
["buildbots_builds_total", "Total number of builds in the last 24h."],
|
||||
]:
|
||||
|
||||
descriptor = monitoring_v3.types.MetricDescriptor()
|
||||
descriptor.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
|
||||
descriptor.metric_kind = (
|
||||
monitoring_v3.enums.MetricDescriptor.MetricKind.GAUGE)
|
||||
descriptor.value_type = (
|
||||
monitoring_v3.enums.MetricDescriptor.ValueType.DOUBLE)
|
||||
descriptor.description = desc_desc
|
||||
descriptor = client.create_metric_descriptor(project_name, descriptor)
|
||||
print('Created {}.'.format(descriptor.name))
|
||||
|
||||
|
||||
def gcp_write_data(project_id: str, stats: BuildStats):
|
||||
"""Upload metrics to Stackdriver."""
|
||||
client = monitoring_v3.MetricServiceClient()
|
||||
project_name = client.project_path(project_id)
|
||||
now = datetime.datetime.now()
|
||||
|
||||
for desc_type, value in [
|
||||
["buildbots_percent_failed", stats.percent_failed],
|
||||
["buildbots_builds_successful", stats.successful],
|
||||
["buildbots_builds_failed", stats.failed],
|
||||
["buildbots_builds_total", stats.total],
|
||||
]:
|
||||
series = monitoring_v3.types.TimeSeries()
|
||||
series.metric.type = 'custom.googleapis.com/buildbots_{}'.format(desc_type)
|
||||
series.resource.type = 'global'
|
||||
point = series.points.add()
|
||||
point.value.double_value = value
|
||||
point.interval.end_time.seconds = int(now.timestamp())
|
||||
client.create_time_series(project_name, [series])
|
||||
|
||||
if __name__ == '__main__':
|
||||
gcp_create_metric_descriptor(GCP_PROJECT_ID)
|
||||
stats = get_buildbot_stats(
|
||||
datetime.datetime.now() - datetime.timedelta(hours=24))
|
||||
gcp_write_data(GCP_PROJECT_ID, stats)
|
||||
print(stats)
|
|
@ -4,4 +4,5 @@ gitpython==3.0.5
|
|||
retrying==1.3.3
|
||||
pathspec==0.7.0
|
||||
pyaml==19.12.0
|
||||
unidiff==0.5.5
|
||||
unidiff==0.5.5
|
||||
requests==2.22.0
|
Loading…
Reference in a new issue