diff --git a/scripts/metrics/jenkins.py b/scripts/metrics/jenkins.py index e55cdc8..7f54818 100644 --- a/scripts/metrics/jenkins.py +++ b/scripts/metrics/jenkins.py @@ -1,13 +1,25 @@ #!/usr/bin/env python3 -import requests -from typing import Optional, List, Dict -import json -import os -from urllib.parse import urljoin -import datetime -import numpy import csv +import datetime +import hashlib +import json +import numpy +import requests +import os +import sys +from typing import Optional, List, Dict +from urllib.parse import urljoin + + +class Stage: + + def __init__(self, stage_dict: Dict): + self.name = stage_dict['name'] + self.success = stage_dict['status'].lower() == 'success' + self.start_time = datetime.datetime.fromtimestamp(stage_dict['startTimeMillis']/1000) + self.duration = datetime.timedelta(milliseconds=stage_dict['durationMillis']) + class Build: @@ -17,6 +29,7 @@ class Build: self.result = build_dict['result'] self.start_time = datetime.datetime.fromtimestamp(build_dict['timestamp']/1000) self.duration = datetime.timedelta(milliseconds=build_dict['duration']) + self.stages = [] # type: List[Stage] @property def hour(self) -> datetime.datetime: @@ -35,10 +48,12 @@ class Build: day=self.start_time.day, ) + def update_from_wfdata(self, wfdata: Dict): + self.stages = [Stage(s) for s in wfdata['stages']] + class JenkinsStatsReader: - - _JENKINS_DAT_FILE = 'tmp/jenkins.json' + _TMP_DIR = 'tmp/jenkins' def __init__(self): self.username = None # type: Optional[str] @@ -62,28 +77,51 @@ class JenkinsStatsReader: return self.builds.keys() def get_data(self): - if not os.path.isfile(self._JENKINS_DAT_FILE): - self.fetch_data() - self.parse_data() + jobnames = self.fetch_jobsnames() + print('Found {} jobs: {}'.format(len(jobnames), jobnames)) + self.get_builds(jobnames) + self.get_workflow_data() self.create_statistics('hour') self.create_statistics('day') - def fetch_data(self): - response = self._session.get( - urljoin(self.jenkins_url, 'api/json?tree=jobs[name,url,allBuilds[number,result,duration,url,timestamp]]')) - with open(self._JENKINS_DAT_FILE, 'w') as jenkins_data_file: - json.dump(response.json(), jenkins_data_file) + def cached_get(self, url) -> Dict: + m = hashlib.sha256() + m.update(url.encode('utf-8')) + filename = m.digest().hex() + cache_file = os.path.join(self._TMP_DIR, filename) + if os.path.isfile(cache_file): + with open(cache_file, 'r') as json_file: + data = json.load(json_file) + return data - def parse_data(self): - with open(self._JENKINS_DAT_FILE) as jenkins_data_file: - build_data = json.load(jenkins_data_file) - for job in build_data['jobs']: - job_name = job['name'] - self.builds[job_name] = [Build(job_name, b) for b in job['allBuilds']] + response = self._session.get(urljoin(self.jenkins_url, url)) + if response.status_code != 200: + raise IOError('Could not read data from {}:\n{}'.format(url, response.text)) + os.makedirs(self._TMP_DIR, exist_ok=True) + with open(cache_file, 'w') as jenkins_data_file: + jenkins_data_file.write(response.text) + return response.json() + + def fetch_jobsnames(self) -> List[str]: + data = self.cached_get('api/json?tree=jobs[name]') + return [job['name'] for job in data['jobs']] + + def get_builds(self, job_names): + for job_name in job_names: + print('Gettings builds for: {}'.format(job_name)) + build_data = self.cached_get('job/{}/api/json?tree=allBuilds[number,result,duration,timestamp,executor]'.format(job_name)) + self.builds[job_name] = [Build(job_name, b) for b in build_data['allBuilds']] print('{} has {} builds'.format(job_name, len(self.builds[job_name]))) + def get_workflow_data(self): + for job_name, builds in self.builds.items(): + for i, build in enumerate(builds): + wfdata = self.cached_get('job/{}/{}/wfapi/'.format(job_name, build.number)) + build.update_from_wfdata(wfdata) + sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds))) + sys.stdout.flush() + def create_statistics(self, group_by: str): - # only look at Phab for job_name, builds in self.builds.items(): print('Writing data for {}'.format(job_name)) # TODO: add success/failure rates