1
0
Fork 0

gettig stats from buildbots

This commit is contained in:
Christian Kühnel 2020-03-31 16:56:35 +02:00
parent 51e01caa5d
commit 0e84fd14bb

View file

@ -0,0 +1,140 @@
#!/usr/bin/env python3
# Copyright 2019 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import csv
import datetime
import gzip
import os
import mailbox
import requests
import re
from typing import List, Dict, Set
EMAIL_ARCHIVE_URL = 'http://lists.llvm.org/pipermail/llvm-dev/{year}-{month}.txt.gz'
TMP_DIR = os.path.join(os.path.dirname(__file__), 'tmp')
class LLVMBotArchiveScanner:
def __init__(self):
self._tmpdir = TMP_DIR
@staticmethod
def _generate_archive_url(month: datetime.date) -> str:
return EMAIL_ARCHIVE_URL.format(year=month.year, month=month.strftime('%B'))
def _download_archive(self, month: datetime.date):
filename = os.path.join(self._tmpdir, 'llvmdev-{year}-{month:02d}.txt'.format(year=month.year, month=month.month))
url = self._generate_archive_url(month)
# FIXME: decompress the files
self.download(url, filename)
def get_archives(self, start_month: datetime.date):
print('Downloading data...')
month = start_month
today = datetime.date.today()
while month < today:
self._download_archive(month)
if month.month < 12:
month = datetime.date(year=month.year, month=month.month+1, day=1)
else:
month = datetime.date(year=month.year+1, month=1, day=1)
def extract_emails(self) -> List[mailbox.Message]:
result = []
for archive_name in (d for d in os.listdir(self._tmpdir) if d.startswith('llvmdev-')):
print('Scanning {}'.format(archive_name))
mb = mailbox.mbox(os.path.join(self._tmpdir, archive_name), factory=mbox_reader)
for mail in mb.values():
subject = mail.get('subject')
if subject is None:
continue
if 'Buildbot numbers' in mail['subject']:
yield(mail)
yield
def get_attachments(self, email: mailbox.Message):
if email is None:
return
week_str = re.search(r'(\d+/\d+/\d+)', email['subject']).group(1)
week = datetime.datetime.strptime(week_str, '%m/%d/%Y').date()
attachment_url = re.search(r'Name: completed_failed_avr_time.csv[^<]*URL: <([^>]+)>', email.get_payload(), re.DOTALL).group(1)
filename = os.path.join(self._tmpdir, 'buildbot_stats_{}.csv'.format(week.isoformat()))
self.download(attachment_url, filename)
@staticmethod
def download(url, filename):
if os.path.exists(filename):
return
r = requests.get(url)
print('Getting {}'.format(filename))
with open(filename, 'wb') as f:
f.write(r.content)
def merge_results(self):
def _convert_int(s: str) -> int:
if len(s) == 0:
return 0
return int(s)
bot_stats = {} # type: Dict[str, Dict[datetime.date, float]]
weeks = set() # type: Set[datetime.date]
for csv_filename in (d for d in os.listdir(self._tmpdir) if d.startswith('buildbot_stats_')):
week_str = re.search(r'(\d+-\d+-\d+)', csv_filename).group(1)
week = datetime.datetime.fromisoformat(week_str).date()
weeks.add(week)
with open(os.path.join(self._tmpdir, csv_filename)) as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
name = row['name']
red_build = _convert_int(row['red_builds'])
all_builds = _convert_int(row['all_builds'])
percentage = 100.0 * red_build / all_builds
bot_stats.setdefault(name, {})
bot_stats[name][week] = percentage
with open(os.path.join(self._tmpdir, 'buildbot_weekly.csv'), 'w') as csv_file:
fieldnames = ['week']
filtered_bots = sorted(b for b in bot_stats.keys()) # if len(bot_stats[b]) == len(weeks)
fieldnames.extend(filtered_bots)
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for week in sorted(weeks):
row = {'week': week.isoformat()}
for bot in filtered_bots:
percentage = bot_stats[bot].get(week)
if percentage is None:
continue
row[bot] = percentage
writer.writerow(row)
def mbox_reader(stream):
"""Read a non-ascii message from mailbox.
Based on https://stackoverflow.com/questions/37890123/how-to-trap-an-exception-that-occurs-in-code-underlying-python-for-loop
"""
data = stream.read()
text = data.decode(encoding="utf-8")
return mailbox.mboxMessage(text)
if __name__ == '__main__':
scanner = LLVMBotArchiveScanner()
scanner.get_archives(datetime.date(year=2019, month=8, day=1))
for message in scanner.extract_emails():
scanner.get_attachments(message)
scanner.merge_results()