#!/usr/bin/env python3 import logging import psycopg2 import os import datetime import requests from typing import Optional, Dict, List import json PHABRICATOR_URL = "https://reviews.llvm.org/api/" BUILDBOT_URL = "https://lab.llvm.org/buildbot/api/v2/" # TODO(kuhnel): retry on connection issues, maybe resuse # https://github.com/google/llvm-premerge-checks/blob/main/scripts/phabtalk/phabtalk.py#L44 # TODO(kuhnel): Import the step data so we can figure out in which step a build fails # (e.g. compile vs. test) def connect_to_db() -> psycopg2.extensions.connection: """Connect to the database.""" conn = psycopg2.connect( f"host=127.0.0.1 sslmode=disable dbname=stats user=stats password={os.getenv('DB_PASSWORD')}") return conn def create_tables(conn: psycopg2.extensions.connection): cur = conn.cursor() cur.execute( """CREATE TABLE IF NOT EXISTS buildbot_workers ( timestamp timestamp NOT NULL, worker_id integer NOT NULL, data jsonb NOT NULL );""" ) cur.execute( """CREATE INDEX IF NOT EXISTS buildbot_worker_ids ON buildbot_workers (worker_id);""" ) cur.execute( """CREATE INDEX IF NOT EXISTS buildbot_worker_timestamp ON buildbot_workers (timestamp);""" ) # Note: step_data is not yet populated with data! cur.execute( """CREATE TABLE IF NOT EXISTS buildbot_builds ( build_id integer PRIMARY KEY, builder_id integer NOT NULL, build_number integer NOT NULL, build_data jsonb NOT NULL, step_data jsonb );""" ) cur.execute( """CREATE TABLE IF NOT EXISTS buildbot_buildsets ( buildset_id integer PRIMARY KEY, data jsonb NOT NULL );""" ) cur.execute( """CREATE TABLE IF NOT EXISTS buildbot_buildrequests ( buildrequest_id integer PRIMARY KEY, buildset_id integer NOT NULL, data jsonb NOT NULL );""" ) cur.execute( """CREATE TABLE IF NOT EXISTS buildbot_builders ( builder_id integer PRIMARY KEY, timestamp timestamp NOT NULL, name text NOT NULL, data jsonb NOT NULL );""" ) conn.commit() def get_worker_status( worker_id: int, conn: psycopg2.extensions.connection ) -> Optional[Dict]: """Note: postgres returns a dict for a stored json object.""" cur = conn.cursor() cur.execute( "SELECT data FROM buildbot_workers WHERE worker_id = %s ORDER BY timestamp DESC;", [worker_id], ) row = cur.fetchone() if row is None: return None return row[0] def get_builder_status( builder_id: int, conn: psycopg2.extensions.connection ) -> Optional[Dict]: """Note: postgres returns a dict for a stored json object.""" cur = conn.cursor() cur.execute( """SELECT data FROM buildbot_builders WHERE builder_id = %s ORDER BY timestamp DESC;""", [builder_id], ) row = cur.fetchone() if row is None: return None return row[0] def set_worker_status( timestamp: datetime.datetime, worker_id: int, data: str, conn: psycopg2.extensions.connection, ): cur = conn.cursor() cur.execute( """INSERT INTO buildbot_workers (timestamp, worker_id, data) values (%s,%s,%s);""", (timestamp, worker_id, data), ) def update_workers(conn: psycopg2.extensions.connection): logging.info("Updating worker status...") response = requests.get(BUILDBOT_URL + "workers") timestamp = datetime.datetime.now() for worker in response.json()["workers"]: worker_id = worker["workerid"] data = json.dumps(worker) # TODO: It would be faster if request all worker info and cache it # locally old_data = get_worker_status(worker_id, conn) # only update worker information if it has changed as this data is quite # static if old_data is None or worker != old_data: set_worker_status(timestamp, worker_id, data, conn) conn.commit() def update_builders(conn: psycopg2.extensions.connection): """get list of all builder ids.""" logging.info("Updating builder status...") response = requests.get(BUILDBOT_URL + "builders") timestamp = datetime.datetime.now() for builder in response.json()["builders"]: builder_id = builder["builderid"] data = json.dumps(builder) # TODO: It would be faster if request all builder info and cache it # locally old_data = get_builder_status(builder_id, conn) # only update worker information if it has changed as this data is quite # static if old_data is None or builder != old_data: set_worker_status(timestamp, builder_id, data, conn) conn.commit() def get_last_build(conn: psycopg2.extensions.connection) -> int: """Get the latest build number for a builder. This is used to only get new builds.""" cur = conn.cursor() cur.execute("SELECT MAX(build_id) FROM buildbot_builds") row = cur.fetchone() if row is None or row[0] is None: return 0 return row[0] def update_build_status(conn: psycopg2.extensions.connection): start_id = get_last_build(conn) logging.info("Updating build results, starting with {}...".format(start_id)) url = BUILDBOT_URL + "builds" cur = conn.cursor() for result_set in rest_request_iterator(url, "builds", "buildid", start_id=start_id): args_str = b",".join( cur.mogrify( b" (%s,%s,%s,%s) ", ( build["buildid"], build["builderid"], build["number"], json.dumps(build, sort_keys=True), ), ) for build in result_set if build["complete"] ) cur.execute( b"INSERT INTO buildbot_builds (build_id, builder_id, build_number, build_data) values " + args_str ) logging.info("last build id: {}".format(result_set[-1]["buildid"])) conn.commit() def rest_request_iterator( url: str, array_field_name: str, id_field_name: str, start_id: int = 0, step: int = 1000, ): """Request paginated data from the buildbot master. This returns a generator. Each call to it gives you shards of <=limit results. This can be used to do a mass-SQL insert of data. Limiting the range of the returned IDs causes Buildbot to sort the data. This makes incremental imports much easier. """ while True: count = 0 stop_id = start_id + step response = requests.get( url + "?{id_field_name}__gt={start_id}&{id_field_name}__le={stop_id}&".format( **locals() ) ) if response.status_code != 200: raise Exception( "Got status code {} on request to {}".format(response.status_code, url) ) results = response.json()[array_field_name] if len(results) == 0: return yield results start_id = stop_id def get_latest_buildset(conn: psycopg2.extensions.connection) -> int: """Get the maximumg buildset id. This is useful for incremental updates.""" cur = conn.cursor() cur.execute("SELECT MAX(buildset_id) from buildbot_buildsets;") row = cur.fetchone() if row[0] is None: return 0 return row[0] def update_buildsets(conn: psycopg2.extensions.connection): start_id = get_latest_buildset(conn) logging.info("Getting buildsets, starting with {}...".format(start_id)) url = BUILDBOT_URL + "buildsets" cur = conn.cursor() for result_set in rest_request_iterator( url, "buildsets", "bsid", start_id=start_id ): args_str = b",".join( cur.mogrify( b" (%s,%s) ", (buildset["bsid"], json.dumps(buildset, sort_keys=True)), ) for buildset in result_set if buildset["complete"] ) if len(args_str) == 0: break cur.execute( b"INSERT INTO buildbot_buildsets (buildset_id, data) values " + args_str ) logging.info("last id {}".format(result_set[-1]["bsid"])) conn.commit() def get_latest_buildrequest(conn: psycopg2.extensions.connection) -> int: cur = conn.cursor() cur.execute("SELECT MAX(buildrequest_id) from buildbot_buildrequests;") row = cur.fetchone() if row[0] is None: return 0 return row[0] def update_buildrequests(conn: psycopg2.extensions.connection): start_id = get_latest_buildrequest(conn) logging.info("Getting buildrequests, starting with {}...".format(start_id)) url = BUILDBOT_URL + "buildrequests" cur = conn.cursor() for result_set in rest_request_iterator( url, "buildrequests", "buildrequestid", start_id=start_id ): # cur.mogrify returns a byte string, so we need to join on a byte string args_str = b",".join( cur.mogrify( " (%s,%s,%s) ", ( buildrequest["buildrequestid"], buildrequest["buildsetid"], json.dumps(buildrequest), ), ) for buildrequest in result_set if buildrequest["complete"] ) if len(args_str) == 0: break cur.execute( b"INSERT INTO buildbot_buildrequests (buildrequest_id, buildset_id, data) values " + args_str ) logging.info("{}".format(result_set[-1]["buildrequestid"])) conn.commit() if __name__ == "__main__": logging.basicConfig(level='INFO', format='%(levelname)-7s %(message)s') conn = connect_to_db() create_tables(conn) update_workers(conn) update_builders(conn) update_build_status(conn) update_buildsets(conn) update_buildrequests(conn)