1
0
Fork 0

fixed import speed for buildsets

This commit is contained in:
Christian Kühnel 2021-05-11 13:09:12 +02:00 committed by Mikhail Goncharov
parent 9aefaf40cc
commit b6d3986660

View file

@ -13,9 +13,6 @@ BUILDBOT_URL = "https://lab.llvm.org/buildbot/api/v2/"
# TODO(kuhnel): retry on connection issues, maybe resuse # TODO(kuhnel): retry on connection issues, maybe resuse
# https://github.com/google/llvm-premerge-checks/blob/main/scripts/phabtalk/phabtalk.py#L44 # https://github.com/google/llvm-premerge-checks/blob/main/scripts/phabtalk/phabtalk.py#L44
# TODO(kuhnel): get "buildsets" and "buildrequests" so that we can get the
# commit hash for each build
def connect_to_db() -> psycopg2.extensions.connection: def connect_to_db() -> psycopg2.extensions.connection:
"""Connect to the database, create tables as needed.""" """Connect to the database, create tables as needed."""
@ -115,8 +112,7 @@ def update_worker_status(conn: psycopg2.extensions.connection):
def get_builders() -> List[int]: def get_builders() -> List[int]:
"""get list of all builder ids.""" """get list of all builder ids."""
# TODO(kuhnel): do we also want to store the builder information? # TODO(kuhnel): store this data as we want to get the builder names
# Does it contain useful information?
response = requests.get(BUILDBOT_URL + "builders") response = requests.get(BUILDBOT_URL + "builders")
return [builder["builderid"] for builder in response.json()["builders"]] return [builder["builderid"] for builder in response.json()["builders"]]
@ -151,7 +147,7 @@ def add_build(builder: int, number: int, build: str, steps: str, conn):
) )
def update_build_status(conn): def update_build_status(conn: psycopg2.extensions.connection):
print("Updating build results...") print("Updating build results...")
# import only builds we have not yet stores in the database # import only builds we have not yet stores in the database
for builder in get_builders(): for builder in get_builders():
@ -175,51 +171,95 @@ def update_build_status(conn):
conn.commit() conn.commit()
def rest_request_iterator(url: str, field_name: str, limit: int = 1000): def rest_request_iterator(
url: str,
array_field_name: str,
id_field_name: str,
start_id: int = 0,
step: int = 1000,
):
"""Request paginated data from the buildbot master. """Request paginated data from the buildbot master.
This returns a generator. Each call to it gives you shards of This returns a generator. Each call to it gives you shards of
<=limit results. This can be used to do a mass-SQL insert of data. <=limit results. This can be used to do a mass-SQL insert of data.
Limiting the range of the returned IDs causes Buildbot to sort the data.
This makes incremental imports much easier.
""" """
offset = 0
while True: while True:
count = 0 count = 0
response = requests.get(url + "?offset={}&limit={}".format(offset, limit)) stop_id = start_id + step
response = requests.get(
url
+ "?{id_field_name}__gt={start_id}&{id_field_name}__le={stop_id}&".format(
**locals()
)
)
if response.status_code != 200: if response.status_code != 200:
raise Exception( raise Exception(
"Got status code {} on request to {}".format(response.status_code, url) "Got status code {} on request to {}".format(response.status_code, url)
) )
results = response.json()[field_name] results = response.json()[array_field_name]
yield results yield results
if len(results) < limit: if len(results) < step:
return return
offset += limit start_id = stop_id
def get_buildsets(conn): def get_latest_buildset(conn: psycopg2.extensions.connection) -> int:
print("Getting buildsets...") """Get the maximumg buildset id.
This is useful for incremental updates."""
cur = conn.cursor()
cur.execute("SELECT MAX(buildset_id) from buildbot_buildsets;")
row = cur.fetchone()
if row[0] is None:
return 0
return row[0]
def get_buildsets(conn: psycopg2.extensions.connection):
start_id = get_latest_buildset(conn)
print("Getting buildsets, starting with {}...".format(start_id))
url = BUILDBOT_URL + "buildsets" url = BUILDBOT_URL + "buildsets"
cur = conn.cursor() cur = conn.cursor()
count = 0
for buildset in rest_request_iterator(url, "buildsets"): for result_set in rest_request_iterator(
# TODO(kuhnel): implement incremental update url, "buildsets", "bsid", start_id=start_id
cur.execute( ):
"INSERT INTO buildbot_buildsets (buildset_id, data) values (%s,%s);", args_str = b",".join(
cur.mogrify(
b" (%s,%s) ",
(buildset["bsid"], json.dumps(buildset, sort_keys=True)), (buildset["bsid"], json.dumps(buildset, sort_keys=True)),
) )
count += 1 for buildset in result_set
if count % 100 == 0: if buildset["complete"]
print(" {}".format(count)) )
conn.commit()
cur.execute(
b"INSERT INTO buildbot_buildsets (buildset_id, data) values " + args_str
)
print(" {}".format(result_set[-1]["bsid"]))
conn.commit() conn.commit()
def get_buildrequests(conn): def get_latest_buildrequest(conn: psycopg2.extensions.connection) -> int:
print("Getting buildrequests...") cur = conn.cursor()
cur.execute("SELECT MAX(buildrequest_id) from buildbot_buildrequests;")
row = cur.fetchone()
if row[0] is None:
return 0
return row[0]
def get_buildrequests(conn: psycopg2.extensions.connection):
start_id = get_latest_buildrequest(conn)
print("Getting buildrequests, starting with {}...".format(start_id))
url = BUILDBOT_URL + "buildrequests" url = BUILDBOT_URL + "buildrequests"
cur = conn.cursor() cur = conn.cursor()
# TODO(kuhnel): implement incremental update for result_set in rest_request_iterator(
for result_set in rest_request_iterator(url, "buildrequests"): url, "buildrequests", "buildrequestid", start_id=start_id
):
# cur.mogrify returns byte string, so we need to join on a byte string # cur.mogrify returns byte string, so we need to join on a byte string
args_str = b",".join( args_str = b",".join(
cur.mogrify( cur.mogrify(
@ -231,15 +271,14 @@ def get_buildrequests(conn):
), ),
) )
for buildrequest in result_set for buildrequest in result_set
if buildrequest["complete"]
) )
cur.execute( cur.execute(
b"INSERT INTO buildbot_buildrequests (buildrequest_id, buildset_id, data) values " b"INSERT INTO buildbot_buildrequests (buildrequest_id, buildset_id, data) values "
+ args_str + args_str
) )
print(" {}".format(result_set[-1]["buildrequestid"]))
conn.commit() conn.commit()
# TODO: remove after profiling
break
print("!")
def buildbot_monitoring(): def buildbot_monitoring():
@ -254,7 +293,4 @@ def buildbot_monitoring():
if __name__ == "__main__": if __name__ == "__main__":
import cProfile
# cProfile.run("buildbot_monitoring()")
buildbot_monitoring() buildbot_monitoring()