2020-02-27 23:54:54 +01:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
2020-05-06 12:06:40 +02:00
|
|
|
import csv
|
|
|
|
import datetime
|
|
|
|
import hashlib
|
2020-02-27 23:54:54 +01:00
|
|
|
import json
|
2020-05-06 12:06:40 +02:00
|
|
|
import numpy
|
|
|
|
import requests
|
2020-02-27 23:54:54 +01:00
|
|
|
import os
|
2020-05-06 12:43:35 +02:00
|
|
|
import re
|
2020-05-06 12:06:40 +02:00
|
|
|
import sys
|
|
|
|
from typing import Optional, List, Dict
|
2020-02-27 23:54:54 +01:00
|
|
|
from urllib.parse import urljoin
|
2020-05-06 12:06:40 +02:00
|
|
|
|
|
|
|
|
|
|
|
class Stage:
|
|
|
|
|
|
|
|
def __init__(self, stage_dict: Dict):
|
|
|
|
self.name = stage_dict['name']
|
|
|
|
self.success = stage_dict['status'].lower() == 'success'
|
|
|
|
self.start_time = datetime.datetime.fromtimestamp(stage_dict['startTimeMillis']/1000)
|
|
|
|
self.duration = datetime.timedelta(milliseconds=stage_dict['durationMillis'])
|
|
|
|
|
2020-02-27 23:54:54 +01:00
|
|
|
|
|
|
|
class Build:
|
|
|
|
|
|
|
|
def __init__(self, job_name: str, build_dict: Dict):
|
|
|
|
self.job_name = job_name
|
|
|
|
self.number = build_dict['number']
|
|
|
|
self.result = build_dict['result']
|
|
|
|
self.start_time = datetime.datetime.fromtimestamp(build_dict['timestamp']/1000)
|
|
|
|
self.duration = datetime.timedelta(milliseconds=build_dict['duration'])
|
2020-05-06 12:06:40 +02:00
|
|
|
self.stages = [] # type: List[Stage]
|
2020-05-06 12:43:35 +02:00
|
|
|
self.agent = None # type: Optional[str]
|
2020-02-27 23:54:54 +01:00
|
|
|
|
|
|
|
@property
|
2020-02-28 11:31:14 +01:00
|
|
|
def hour(self) -> datetime.datetime:
|
2020-02-27 23:54:54 +01:00
|
|
|
return datetime.datetime(
|
|
|
|
year=self.start_time.year,
|
|
|
|
month=self.start_time.month,
|
|
|
|
day=self.start_time.day,
|
|
|
|
hour=self.start_time.hour,
|
|
|
|
)
|
|
|
|
|
2020-02-28 11:31:14 +01:00
|
|
|
@property
|
|
|
|
def day(self) -> datetime.datetime:
|
|
|
|
return datetime.datetime(
|
|
|
|
year=self.start_time.year,
|
|
|
|
month=self.start_time.month,
|
|
|
|
day=self.start_time.day,
|
|
|
|
)
|
|
|
|
|
2020-05-06 12:43:35 +02:00
|
|
|
@property
|
|
|
|
def success(self):
|
2020-05-06 14:19:50 +02:00
|
|
|
if self.result is None:
|
|
|
|
return False
|
2020-05-06 12:43:35 +02:00
|
|
|
return self.result.lower() == 'success'
|
|
|
|
|
2020-05-06 12:06:40 +02:00
|
|
|
def update_from_wfdata(self, wfdata: Dict):
|
|
|
|
self.stages = [Stage(s) for s in wfdata['stages']]
|
2020-02-27 23:54:54 +01:00
|
|
|
|
|
|
|
|
2020-05-06 12:06:40 +02:00
|
|
|
class JenkinsStatsReader:
|
|
|
|
_TMP_DIR = 'tmp/jenkins'
|
2020-02-27 23:54:54 +01:00
|
|
|
|
|
|
|
def __init__(self):
|
|
|
|
self.username = None # type: Optional[str]
|
|
|
|
self.password = None # type: Optional[str]
|
|
|
|
self.jenkins_url = None # type: Optional[str]
|
|
|
|
self.jobs = [] # type: List[str]
|
|
|
|
self.builds = {} # type: Dict[str, List[Build]]
|
|
|
|
self._read_config()
|
|
|
|
self._session = requests.session()
|
|
|
|
self._session.auth = (self.username, self.password)
|
|
|
|
|
|
|
|
def _read_config(self, credential_path='~/.llvm-premerge-checks/jenkins-creds.json'):
|
|
|
|
with open(os.path.expanduser(credential_path)) as credential_file:
|
|
|
|
config = json.load(credential_file)
|
|
|
|
self.username = config['username']
|
|
|
|
self.password = config['password']
|
|
|
|
self.jenkins_url = config['jenkins_url']
|
|
|
|
|
|
|
|
@property
|
|
|
|
def job_names(self) -> List[str]:
|
|
|
|
return self.builds.keys()
|
|
|
|
|
|
|
|
def get_data(self):
|
2020-05-06 12:06:40 +02:00
|
|
|
jobnames = self.fetch_jobsnames()
|
|
|
|
print('Found {} jobs: {}'.format(len(jobnames), jobnames))
|
|
|
|
self.get_builds(jobnames)
|
2020-05-06 12:43:35 +02:00
|
|
|
# self.get_workflow_data()
|
|
|
|
self.get_build_agents()
|
2020-02-28 11:31:14 +01:00
|
|
|
self.create_statistics('hour')
|
|
|
|
self.create_statistics('day')
|
2020-05-06 12:43:35 +02:00
|
|
|
self.write_all_builds()
|
2020-02-27 23:54:54 +01:00
|
|
|
|
2020-05-06 12:43:35 +02:00
|
|
|
def cached_get(self, url, as_json: bool = True):
|
2020-05-06 12:06:40 +02:00
|
|
|
m = hashlib.sha256()
|
|
|
|
m.update(url.encode('utf-8'))
|
|
|
|
filename = m.digest().hex()
|
|
|
|
cache_file = os.path.join(self._TMP_DIR, filename)
|
|
|
|
if os.path.isfile(cache_file):
|
|
|
|
with open(cache_file, 'r') as json_file:
|
2020-05-06 12:43:35 +02:00
|
|
|
if as_json:
|
|
|
|
return json.load(json_file)
|
|
|
|
return json_file.read()
|
2020-05-06 12:06:40 +02:00
|
|
|
|
|
|
|
response = self._session.get(urljoin(self.jenkins_url, url))
|
|
|
|
if response.status_code != 200:
|
2020-05-06 13:11:28 +02:00
|
|
|
if response.status_code == 404:
|
|
|
|
return None
|
2020-05-06 12:06:40 +02:00
|
|
|
raise IOError('Could not read data from {}:\n{}'.format(url, response.text))
|
|
|
|
os.makedirs(self._TMP_DIR, exist_ok=True)
|
|
|
|
with open(cache_file, 'w') as jenkins_data_file:
|
|
|
|
jenkins_data_file.write(response.text)
|
2020-05-06 12:43:35 +02:00
|
|
|
if as_json:
|
|
|
|
return response.json()
|
|
|
|
return response.text
|
2020-05-06 12:06:40 +02:00
|
|
|
|
|
|
|
def fetch_jobsnames(self) -> List[str]:
|
|
|
|
data = self.cached_get('api/json?tree=jobs[name]')
|
|
|
|
return [job['name'] for job in data['jobs']]
|
|
|
|
|
|
|
|
def get_builds(self, job_names):
|
|
|
|
for job_name in job_names:
|
|
|
|
print('Gettings builds for: {}'.format(job_name))
|
|
|
|
build_data = self.cached_get('job/{}/api/json?tree=allBuilds[number,result,duration,timestamp,executor]'.format(job_name))
|
|
|
|
self.builds[job_name] = [Build(job_name, b) for b in build_data['allBuilds']]
|
2020-02-27 23:54:54 +01:00
|
|
|
print('{} has {} builds'.format(job_name, len(self.builds[job_name])))
|
|
|
|
|
2020-05-06 12:06:40 +02:00
|
|
|
def get_workflow_data(self):
|
2020-05-06 12:43:35 +02:00
|
|
|
print('Getting workflow data...')
|
2020-05-06 12:06:40 +02:00
|
|
|
for job_name, builds in self.builds.items():
|
|
|
|
for i, build in enumerate(builds):
|
|
|
|
wfdata = self.cached_get('job/{}/{}/wfapi/'.format(job_name, build.number))
|
|
|
|
build.update_from_wfdata(wfdata)
|
|
|
|
sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds)))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
2020-05-06 12:43:35 +02:00
|
|
|
def get_build_agents(self):
|
|
|
|
print('Getting agent names...')
|
|
|
|
for job_name, builds in self.builds.items():
|
|
|
|
for i, build in enumerate(builds):
|
|
|
|
console_log = self.cached_get('job/{}/{}/consoleText'.format(job_name, build.number), as_json=False)
|
2020-05-06 13:11:28 +02:00
|
|
|
if console_log is None:
|
|
|
|
continue
|
2020-05-06 12:43:35 +02:00
|
|
|
match = re.search(r'Running on ([\w-]+) in', console_log)
|
|
|
|
if match:
|
|
|
|
build.agent = match.group(1)
|
|
|
|
sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds)))
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
2020-02-28 11:31:14 +01:00
|
|
|
def create_statistics(self, group_by: str):
|
2020-02-27 23:54:54 +01:00
|
|
|
for job_name, builds in self.builds.items():
|
|
|
|
print('Writing data for {}'.format(job_name))
|
2020-02-28 11:31:14 +01:00
|
|
|
# TODO: add success/failure rates
|
2020-02-27 23:54:54 +01:00
|
|
|
fieldnames = ['date', '# builds', 'median duration', 'p90 duration', 'p95 duration', 'max duration']
|
2020-02-28 11:31:14 +01:00
|
|
|
csv_file = open('tmp/jenkins_{}_{}.csv'.format(job_name, group_by), 'w')
|
2020-02-27 23:54:54 +01:00
|
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel)
|
|
|
|
writer.writeheader()
|
2020-02-28 11:31:14 +01:00
|
|
|
build_hist = {}
|
2020-02-27 23:54:54 +01:00
|
|
|
for build in builds:
|
2020-02-28 11:31:14 +01:00
|
|
|
build_hist.setdefault(getattr(build, group_by), []).append(build)
|
2020-02-27 23:54:54 +01:00
|
|
|
|
2020-02-28 11:31:14 +01:00
|
|
|
for key in sorted(build_hist.keys()):
|
|
|
|
builds = build_hist[key] # type: List[Build]
|
2020-02-27 23:54:54 +01:00
|
|
|
durations = numpy.array([b.duration.seconds for b in builds])
|
|
|
|
writer.writerow({
|
2020-02-28 11:31:14 +01:00
|
|
|
'date': key,
|
2020-02-27 23:54:54 +01:00
|
|
|
'# builds': len(builds),
|
|
|
|
'median duration': numpy.median(durations)/60,
|
|
|
|
'p90 duration': numpy.percentile(durations, 90)/60,
|
|
|
|
'p95 duration': numpy.percentile(durations, 95)/60,
|
|
|
|
'max duration': numpy.max(durations)/60,
|
|
|
|
})
|
|
|
|
|
2020-05-06 12:43:35 +02:00
|
|
|
def write_all_builds(self):
|
2020-05-06 14:19:50 +02:00
|
|
|
fieldnames = ['date', 'job_name', 'build_number', 'duration', 'agent', 'success']
|
|
|
|
csv_file = open('tmp/jenkins_all_builds.csv', 'w')
|
|
|
|
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel)
|
|
|
|
writer.writeheader()
|
2020-05-06 12:43:35 +02:00
|
|
|
for job_name, builds in self.builds.items():
|
|
|
|
for build in builds:
|
|
|
|
writer.writerow({
|
|
|
|
'date': build.start_time,
|
|
|
|
'job_name': job_name,
|
|
|
|
'build_number': build.number,
|
2020-05-06 14:19:50 +02:00
|
|
|
'duration': build.duration.total_seconds()/60.0,
|
2020-05-06 12:43:35 +02:00
|
|
|
'agent': build.agent,
|
|
|
|
'success': build.success,
|
|
|
|
})
|
|
|
|
|
2020-02-27 23:54:54 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
jsr = JenkinsStatsReader()
|
|
|
|
jsr.get_data()
|