#!/usr/bin/env python3 # Copyright 2019 Google LLC # # Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # https://llvm.org/LICENSE.txt # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import csv import datetime import gzip import os import mailbox import requests import re from typing import List, Dict, Set EMAIL_ARCHIVE_URL = 'http://lists.llvm.org/pipermail/llvm-dev/{year}-{month}.txt.gz' TMP_DIR = os.path.join(os.path.dirname(__file__), 'tmp') class LLVMBotArchiveScanner: def __init__(self): self._tmpdir = TMP_DIR @staticmethod def _generate_archive_url(month: datetime.date) -> str: return EMAIL_ARCHIVE_URL.format(year=month.year, month=month.strftime('%B')) def _download_archive(self, month: datetime.date): os.makedirs(self._tmpdir, exist_ok=True) filename = os.path.join(self._tmpdir, 'llvmdev-{year}-{month:02d}.txt'.format(year=month.year, month=month.month)) url = self._generate_archive_url(month) # FIXME: decompress the files self.download(url, filename) def get_archives(self, start_month: datetime.date): print('Downloading data...') month = start_month today = datetime.date.today() while month < today: self._download_archive(month) if month.month < 12: month = datetime.date(year=month.year, month=month.month+1, day=1) else: month = datetime.date(year=month.year+1, month=1, day=1) def extract_emails(self) -> List[mailbox.Message]: result = [] for archive_name in (d for d in os.listdir(self._tmpdir) if d.startswith('llvmdev-')): print('Scanning {}'.format(archive_name)) mb = mailbox.mbox(os.path.join(self._tmpdir, archive_name), factory=mbox_reader) for mail in mb.values(): subject = mail.get('subject') if subject is None: continue if 'Buildbot numbers' in mail['subject']: yield(mail) yield def get_attachments(self, email: mailbox.Message): if email is None: return week_str = re.search(r'(\d+/\d+/\d+)', email['subject']).group(1) week = datetime.datetime.strptime(week_str, '%m/%d/%Y').date() attachment_url = re.search(r'Name: completed_failed_avr_time.csv[^<]*URL: <([^>]+)>', email.get_payload(), re.DOTALL).group(1) filename = os.path.join(self._tmpdir, 'buildbot_stats_{}.csv'.format(week.isoformat())) self.download(attachment_url, filename) @staticmethod def download(url, filename): if os.path.exists(filename): return r = requests.get(url) print('Getting {}'.format(filename)) with open(filename, 'wb') as f: f.write(r.content) def merge_results(self): def _convert_int(s: str) -> int: if len(s) == 0: return 0 return int(s) bot_stats = {} # type: Dict[str, Dict[datetime.date, float]] weeks = set() # type: Set[datetime.date] for csv_filename in (d for d in os.listdir(self._tmpdir) if d.startswith('buildbot_stats_')): week_str = re.search(r'(\d+-\d+-\d+)', csv_filename).group(1) week = datetime.datetime.fromisoformat(week_str).date() weeks.add(week) with open(os.path.join(self._tmpdir, csv_filename)) as csv_file: reader = csv.DictReader(csv_file) for row in reader: name = row['name'] red_build = _convert_int(row['red_builds']) all_builds = _convert_int(row['all_builds']) percentage = 100.0 * red_build / all_builds bot_stats.setdefault(name, {}) bot_stats[name][week] = percentage with open(os.path.join(self._tmpdir, 'buildbot_weekly.csv'), 'w') as csv_file: fieldnames = ['week'] filtered_bots = sorted(b for b in bot_stats.keys()) # if len(bot_stats[b]) == len(weeks) fieldnames.extend(filtered_bots) writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() for week in sorted(weeks): row = {'week': week.isoformat()} for bot in filtered_bots: percentage = bot_stats[bot].get(week) if percentage is None: continue row[bot] = percentage writer.writerow(row) def mbox_reader(stream): """Read a non-ascii message from mailbox. Based on https://stackoverflow.com/questions/37890123/how-to-trap-an-exception-that-occurs-in-code-underlying-python-for-loop """ data = stream.read() text = data.decode(encoding="utf-8") return mailbox.mboxMessage(text) if __name__ == '__main__': scanner = LLVMBotArchiveScanner() scanner.get_archives(datetime.date(year=2019, month=8, day=1)) for message in scanner.extract_emails(): scanner.get_attachments(message) scanner.merge_results()