From ace53cf13b0999504a01b81317c5c84cbab04671 Mon Sep 17 00:00:00 2001 From: Mikhail Goncharov Date: Mon, 29 Jan 2024 14:59:11 +0100 Subject: [PATCH] cleanup --- phabricator-proxy/Dockerfile | 7 - phabricator-proxy/README.md | 4 - phabricator-proxy/build_deploy.sh | 34 --- phabricator-proxy/main.py | 72 ------- scripts/metrics/jenkins.py | 196 ------------------ scripts/metrics/load_buildkite.py | 18 +- terraform/.gitignore | 37 ---- terraform/README.md | 94 --------- terraform/backend.tf | 4 - terraform/billing.tf | 49 ----- terraform/cloudbuild.yaml | 57 ----- terraform/cluster.tf | 179 ---------------- terraform/kubernetes/linux-agents.yaml | 82 -------- terraform/kubernetes/namespace.yaml | 4 - .../secret-buildkite-token-readonly.yaml | 8 - .../kubernetes/secret-buildkite-token.yaml | 8 - .../kubernetes/secret-conduit-token.yaml | 8 - terraform/kubernetes/secret-github-ssh.yaml | 10 - terraform/kubernetes/windows-agents.yaml | 79 ------- terraform/main.tf | 92 -------- terraform/provider.tf | 7 - terraform/secrets.tf | 26 --- terraform/variables.tf | 117 ----------- terraform/variables.tfvars_example | 21 -- 24 files changed, 9 insertions(+), 1204 deletions(-) delete mode 100644 phabricator-proxy/Dockerfile delete mode 100644 phabricator-proxy/README.md delete mode 100755 phabricator-proxy/build_deploy.sh delete mode 100644 phabricator-proxy/main.py delete mode 100644 scripts/metrics/jenkins.py delete mode 100644 terraform/.gitignore delete mode 100644 terraform/README.md delete mode 100644 terraform/backend.tf delete mode 100644 terraform/billing.tf delete mode 100644 terraform/cloudbuild.yaml delete mode 100644 terraform/cluster.tf delete mode 100644 terraform/kubernetes/linux-agents.yaml delete mode 100644 terraform/kubernetes/namespace.yaml delete mode 100644 terraform/kubernetes/secret-buildkite-token-readonly.yaml delete mode 100644 terraform/kubernetes/secret-buildkite-token.yaml delete mode 100644 terraform/kubernetes/secret-conduit-token.yaml delete mode 100644 terraform/kubernetes/secret-github-ssh.yaml delete mode 100644 terraform/kubernetes/windows-agents.yaml delete mode 100644 terraform/main.tf delete mode 100644 terraform/provider.tf delete mode 100644 terraform/secrets.tf delete mode 100644 terraform/variables.tf delete mode 100644 terraform/variables.tfvars_example diff --git a/phabricator-proxy/Dockerfile b/phabricator-proxy/Dockerfile deleted file mode 100644 index ea87af2..0000000 --- a/phabricator-proxy/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM python:3 - -RUN pip install flask gunicorn requests - -ADD main.py / - -CMD ["gunicorn", "--bind", "0.0.0.0:8080", "main:app"] \ No newline at end of file diff --git a/phabricator-proxy/README.md b/phabricator-proxy/README.md deleted file mode 100644 index 21367ad..0000000 --- a/phabricator-proxy/README.md +++ /dev/null @@ -1,4 +0,0 @@ -This is a small service to integrate Harbormaster and buildkite. - -Located at http://build.llvm-merge-guard.org behind http auth and is not -publicly accessible as it's only used from Harbormaster. \ No newline at end of file diff --git a/phabricator-proxy/build_deploy.sh b/phabricator-proxy/build_deploy.sh deleted file mode 100755 index 8d8cd62..0000000 --- a/phabricator-proxy/build_deploy.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/bash -# Copyright 2019 Google LLC -# -# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://llvm.org/LICENSE.txt -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -eux - -DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" -ROOT_DIR="$(dirname ${DIR})" - -# get config options - -IMAGE_NAME="phabricator-proxy" - -docker build -t ${IMAGE_NAME} . -read -p "Push to registry? [yN]" -n 1 -r -echo -if [[ $REPLY =~ ^[Yy]$ ]] -then - source "${ROOT_DIR}/k8s_config" - QUALIFIED_NAME="${GCR_HOSTNAME}/${GCP_PROJECT}/${IMAGE_NAME}" - docker tag ${IMAGE_NAME} ${QUALIFIED_NAME} - docker push ${QUALIFIED_NAME} -fi diff --git a/phabricator-proxy/main.py b/phabricator-proxy/main.py deleted file mode 100644 index afdaeb3..0000000 --- a/phabricator-proxy/main.py +++ /dev/null @@ -1,72 +0,0 @@ -from cmath import log -from flask.logging import default_handler -from urllib.parse import urlparse, parse_qs -import flask -import json -import logging -import logging.handlers -import os -import requests - - -buildkite_api_token = os.getenv("BUILDKITE_API_TOKEN", "") - -app = flask.Flask(__name__) -app.config["DEBUG"] = False -formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') -errHandler = logging.FileHandler('error.log', encoding='utf-8',) -errHandler.setLevel(logging.ERROR) -errHandler.setFormatter(formatter) -app.logger.addHandler(errHandler) -rotatingHandler = logging.handlers.TimedRotatingFileHandler('info.log', when='D', encoding='utf-8', backupCount=8) -rotatingHandler.setFormatter(formatter) -app.logger.addHandler(rotatingHandler) -app.logger.setLevel(logging.INFO) -stdoutLog = logging.StreamHandler() -stdoutLog.setFormatter(formatter) -app.logger.addHandler(stdoutLog) -app.logger.removeHandler(default_handler) - -@app.route('/', methods=['GET']) -def home(): - return "Hi LLVM!" - - -@app.route('/build', methods=['POST', 'GET']) -def build(): - app.logger.info('request: %s %s', flask.request, flask.request.url) - app.logger.info('headers: %s', flask.request.headers) - if flask.request.method == 'POST': - app.logger.info('data: %s', flask.request.data) - app.logger.info('form: %s', flask.request.form) - url = urlparse(flask.request.url) - params = parse_qs(url.query) - build_env = {} - for k, v in params.items(): - if len(v) == 1: - build_env['ph_' + k] = v[0] - refspec = 'main' - if 'ph_scripts_refspec' in build_env: - refspec = build_env['ph_scripts_refspec'] - build_request = { - 'commit': 'HEAD', - 'branch': refspec, - 'env': build_env, - 'message': f'D{build_env["ph_buildable_revision"]}', - } - app.logger.info('buildkite request: %s', build_request) - headers = {'Authorization': f'Bearer {buildkite_api_token}'} - response = requests.post( - 'https://api.buildkite.com/v2/organizations/llvm-project' - '/pipelines/diff-checks/builds', - json=build_request, - headers=headers) - app.logger.info('buildkite response: %s %s', response.status_code, response.text) - rjs = json.loads(response.text) - return rjs['web_url'] - else: - return "expected POST request" - - -if __name__ == '__main__': - app.run(host='0.0.0.0:8080') diff --git a/scripts/metrics/jenkins.py b/scripts/metrics/jenkins.py deleted file mode 100644 index 1b1f1af..0000000 --- a/scripts/metrics/jenkins.py +++ /dev/null @@ -1,196 +0,0 @@ -#!/usr/bin/env python3 - -import csv -import datetime -import hashlib -import json -import numpy -import requests -import os -import re -import sys -from typing import Optional, List, Dict -from urllib.parse import urljoin - - -class Stage: - - def __init__(self, stage_dict: Dict): - self.name = stage_dict['name'] - self.success = stage_dict['status'].lower() == 'success' - self.start_time = datetime.datetime.fromtimestamp(stage_dict['startTimeMillis']/1000) - self.duration = datetime.timedelta(milliseconds=stage_dict['durationMillis']) - - -class Build: - - def __init__(self, job_name: str, build_dict: Dict): - self.job_name = job_name - self.number = build_dict['number'] - self.result = build_dict['result'] - self.start_time = datetime.datetime.fromtimestamp(build_dict['timestamp']/1000) - self.duration = datetime.timedelta(milliseconds=build_dict['duration']) - self.stages = [] # type: List[Stage] - self.agent = None # type: Optional[str] - - @property - def hour(self) -> datetime.datetime: - return datetime.datetime( - year=self.start_time.year, - month=self.start_time.month, - day=self.start_time.day, - hour=self.start_time.hour, - ) - - @property - def day(self) -> datetime.datetime: - return datetime.datetime( - year=self.start_time.year, - month=self.start_time.month, - day=self.start_time.day, - ) - - @property - def success(self): - if self.result is None: - return False - return self.result.lower() == 'success' - - def update_from_wfdata(self, wfdata: Dict): - self.stages = [Stage(s) for s in wfdata['stages']] - - -class JenkinsStatsReader: - _TMP_DIR = 'tmp/jenkins' - - def __init__(self): - self.username = None # type: Optional[str] - self.password = None # type: Optional[str] - self.jenkins_url = None # type: Optional[str] - self.jobs = [] # type: List[str] - self.builds = {} # type: Dict[str, List[Build]] - self._read_config() - self._session = requests.session() - self._session.auth = (self.username, self.password) - - def _read_config(self, credential_path='~/.llvm-premerge-checks/jenkins-creds.json'): - with open(os.path.expanduser(credential_path)) as credential_file: - config = json.load(credential_file) - self.username = config['username'] - self.password = config['password'] - self.jenkins_url = config['jenkins_url'] - - @property - def job_names(self) -> List[str]: - return self.builds.keys() - - def get_data(self): - jobnames = self.fetch_jobsnames() - print('Found {} jobs: {}'.format(len(jobnames), jobnames)) - self.get_builds(jobnames) - # self.get_workflow_data() - self.get_build_agents() - self.create_statistics('hour') - self.create_statistics('day') - self.write_all_builds() - - def cached_get(self, url, as_json: bool = True): - m = hashlib.sha256() - m.update(url.encode('utf-8')) - filename = m.digest().hex() - cache_file = os.path.join(self._TMP_DIR, filename) - if os.path.isfile(cache_file): - with open(cache_file, 'r') as json_file: - if as_json: - return json.load(json_file) - return json_file.read() - - response = self._session.get(urljoin(self.jenkins_url, url)) - if response.status_code != 200: - if response.status_code == 404: - return None - raise IOError('Could not read data from {}:\n{}'.format(url, response.text)) - os.makedirs(self._TMP_DIR, exist_ok=True) - with open(cache_file, 'w') as jenkins_data_file: - jenkins_data_file.write(response.text) - if as_json: - return response.json() - return response.text - - def fetch_jobsnames(self) -> List[str]: - data = self.cached_get('api/json?tree=jobs[name]') - return [job['name'] for job in data['jobs']] - - def get_builds(self, job_names): - for job_name in job_names: - print('Gettings builds for: {}'.format(job_name)) - build_data = self.cached_get('job/{}/api/json?tree=allBuilds[number,result,duration,timestamp,executor]'.format(job_name)) - self.builds[job_name] = [Build(job_name, b) for b in build_data['allBuilds']] - print('{} has {} builds'.format(job_name, len(self.builds[job_name]))) - - def get_workflow_data(self): - print('Getting workflow data...') - for job_name, builds in self.builds.items(): - for i, build in enumerate(builds): - wfdata = self.cached_get('job/{}/{}/wfapi/'.format(job_name, build.number)) - build.update_from_wfdata(wfdata) - sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds))) - sys.stdout.flush() - - def get_build_agents(self): - print('Getting agent names...') - for job_name, builds in self.builds.items(): - for i, build in enumerate(builds): - console_log = self.cached_get('job/{}/{}/consoleText'.format(job_name, build.number), as_json=False) - if console_log is None: - continue - match = re.search(r'Running on ([\w-]+) in', console_log) - if match: - build.agent = match.group(1) - sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds))) - sys.stdout.flush() - - def create_statistics(self, group_by: str): - for job_name, builds in self.builds.items(): - print('Writing data for {}'.format(job_name)) - # TODO: add success/failure rates - fieldnames = ['date', '# builds', 'median duration', 'p90 duration', 'p95 duration', 'max duration'] - csv_file = open('tmp/jenkins_{}_{}.csv'.format(job_name, group_by), 'w') - writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel) - writer.writeheader() - build_hist = {} - for build in builds: - build_hist.setdefault(getattr(build, group_by), []).append(build) - - for key in sorted(build_hist.keys()): - builds = build_hist[key] # type: List[Build] - durations = numpy.array([b.duration.seconds for b in builds]) - writer.writerow({ - 'date': key, - '# builds': len(builds), - 'median duration': numpy.median(durations)/60, - 'p90 duration': numpy.percentile(durations, 90)/60, - 'p95 duration': numpy.percentile(durations, 95)/60, - 'max duration': numpy.max(durations)/60, - }) - - def write_all_builds(self): - fieldnames = ['date', 'job_name', 'build_number', 'duration', 'agent', 'success'] - csv_file = open('tmp/jenkins_all_builds.csv', 'w') - writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel) - writer.writeheader() - for job_name, builds in self.builds.items(): - for build in builds: - writer.writerow({ - 'date': build.start_time, - 'job_name': job_name, - 'build_number': build.number, - 'duration': build.duration.total_seconds()/60.0, - 'agent': build.agent, - 'success': build.success, - }) - - -if __name__ == '__main__': - jsr = JenkinsStatsReader() - jsr.get_data() diff --git a/scripts/metrics/load_buildkite.py b/scripts/metrics/load_buildkite.py index 930d1ec..c344b02 100644 --- a/scripts/metrics/load_buildkite.py +++ b/scripts/metrics/load_buildkite.py @@ -77,7 +77,7 @@ def download_job_artifacts(conn): c.execute(f""" select ja.meta from (select j.key,j.id job_id, a->>'id' aid, a as meta from jobs j, json_array_elements(j.meta->'artifacts') as a) as ja -left join artifacts a on a.job_id = ja.job_id and a.id=ja.aid +left join artifacts a on a.job_id = ja.job_id and a.id=ja.aid where a.id IS NULL""") total = c.rowcount logging.info(f'will download {total} artifacts') @@ -207,7 +207,7 @@ def download_job_artifacts_list(conn): logging.info('download jobs artifact lsits') with conn.cursor() as c: c.execute(""" -SELECT key, raw->>'artifacts_url', meta +SELECT key, raw->>'artifacts_url', meta FROM jobs WHERE (meta->>'artifacts' IS NULL) AND (raw->>'artifacts_url' IS NOT NULL)""") cnt = 0 @@ -283,10 +283,10 @@ if __name__ == '__main__': logging.basicConfig(level='INFO', format='%(levelname)-7s %(message)s') cn = connect() logging.info('downloading buildkite data') - #insert_all_builds(cn) - insert_new_builds(cn) - update_running_builds(cn) - insert_new_jobs(cn) - download_job_artifacts_list(cn) - download_job_artifacts(cn) - download_job_logs(cn) + # insert_all_builds(cn) + # insert_new_builds(cn) + # update_running_builds(cn) + # insert_new_jobs(cn) + # download_job_artifacts_list(cn) + # download_job_artifacts(cn) + # download_job_logs(cn) diff --git a/terraform/.gitignore b/terraform/.gitignore deleted file mode 100644 index 1e0a92a..0000000 --- a/terraform/.gitignore +++ /dev/null @@ -1,37 +0,0 @@ -# Local .terraform directories -**/.terraform/* - -# .tflock files -.terraform.lock.hcl - -# .tfstate files -*.tfstate -*.tfstate.* - -# Crash log files -crash.log -crash.*.log - -# Exclude all .tfvars files, which are likely to contain sensitive data, such as -# password, private keys, and other secrets. These should not be part of version -# control as they are data points which are potentially sensitive and subject -# to change depending on the environment. -*.tfvars -*.tfvars.json - -# Ignore override files as they are usually used to override resources locally and so -# are not checked in -override.tf -override.tf.json -*_override.tf -*_override.tf.json - -# Include override files you do wish to add to version control using negated pattern -# !example_override.tf - -# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan -# example: *tfplan* - -# Ignore CLI configuration files -.terraformrc -terraform.rc diff --git a/terraform/README.md b/terraform/README.md deleted file mode 100644 index 84d94c3..0000000 --- a/terraform/README.md +++ /dev/null @@ -1,94 +0,0 @@ -# Permissions -TODO - -# Step 1: Bootstrap - -Copy `variables.tfvars` from `variables.tfvars_example` -Replace the placeholders for `project-id` and `billing-account` in `variables.tfvars` -Insert secret values in the `variables.tfvars` file or insert values on runtime when using terraform plan or apply - -### Initialise terraform with local backend -Comment out everything in `backend.tf` file to use local state for the first run as the bucket for storing the state is not created. -``` -terraform -chdir=terraform init -``` - -### Create the state bucket -``` -terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_storage_bucket.terraform_state" -``` -To disable the conformation use `--auto-aprove` flag - -## Migrate the state to the bucket. -Uncomment everything in `backend.tf` file to use remote state with newly created bucket. -``` -export PROJECT_ID="" -``` -``` -terraform -chdir=terraform init -backend-config="bucket=terraform-state-${PROJECT_ID}" -backend-config="prefix=terraform/state" -``` -## Create the secrets. -``` -terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_secret_manager_secret.secret" -``` - -## Create the cluster. -Due to the problem described [here](https://github.com/hashicorp/terraform-provider-kubernetes/issues/1775) terraform kubernetes provider requires kubernetes cluster to be created first. So to create the cluster without applying kubernetes resources we will do the apply in 2 runs using the `-target` flag. -``` -terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_container_cluster.llvm_premerge_checks_cluster" -``` - -## Build the builders -To deploy build workers you need the worker docker images in your project. -TODO cloud build SA permissions - -### Linux worker image -Execute cloud build to build Linux worker: -``` -gcloud builds submit --config=containers/buildkite-premerge-debian/cloudbuild.yaml containers/buildkite-premerge-debian/ --project=${PROJECT_ID} -``` - -### Windows worker image -Build windows cloud builder. Follow the steps described here: [link](https://github.com/GoogleCloudPlatform/cloud-builders-community/tree/master/windows-builder) - -Execute cloud build to build Windows worker: -``` -gcloud builds submit --config=containers/buildkite-premerge-windows/cloudbuild.yaml containers/buildkite-premerge-windows/ --project=${PROJECT_ID} -``` - -## Create the rest of the gcp resources including workers in kubernetes pods -``` -terraform -chdir=terraform apply -var-file="variables.tfvars" -``` - -## Terraform cloud build automation -Manual trigger -``` -gcloud builds submit --config=terraform/cloudbuild.yaml terraform --project=${PROJECT_ID} --substitutions=_GIT_REPO=${GIT_REPO} -``` - -Automatic trigger: -``` - -``` - -# Budget -Budget alerts set on the monthly basis. Notification emails will be triggered on 50%, 90% and 100% of you budget. - -## Actions to reduce project costs -Adjust the GKE cluster nodes: Set the node count to 1 per built platform (Linux and Windows). The node count can be controlled from the `variables.tfvars` file using the `linux-agents-count` and `windows-agents-count` parameters. If these parameters are not set in the `variables.tfvars` file, you can check the default parameters configured in the `variables.tf` file. - -## Break glass procedure -Regarding the break glass procedure for emergency configuration of the Kubernetes build nodes, you can use the following set of gcloud commands. It is recommended to execute them from the [Cloud Shell](https://cloud.google.com/shell/docs/using-cloud-shell) for simplicity. Please note that these commands assume you have the necessary permissions and authentication to access and modify the GKE cluster. -``` -export PROJECT_ID= -export ZONE="europe-west3-c" - -gcloud container clusters get-credentials ${PROJECT_ID}-cluster --zone ${ZONE} --project ${PROJECT_ID} -#gcloud container clusters update llvm-premerge-checks-cluster --node-pool linux-agents --zone ${ZONE} --project ${PROJECT_ID} --no-enable-autoscaling -gcloud container clusters resize llvm-premerge-checks-cluster --node-pool linux-agents --num-nodes 1 --zone ${ZONE} --project ${PROJECT_ID} -#gcloud container clusters update llvm-premerge-checks-cluster --node-pool windows-agents --zone ${ZONE} --project ${PROJECT_ID} --no-enable-autoscaling -gcloud container clusters resize llvm-premerge-checks-cluster --node-pool windows-agents --num-nodes 1 --zone ${ZONE} --project ${PROJECT_ID} -``` - -These commands will scale down the deployments to have a single replica for both the Linux and Windows agents. Adjust the replica count as needed. Please note that making changes to your GKE cluster configuration or scaling down nodes may impact the availability and performance of the pipeline. \ No newline at end of file diff --git a/terraform/backend.tf b/terraform/backend.tf deleted file mode 100644 index 27ff2c1..0000000 --- a/terraform/backend.tf +++ /dev/null @@ -1,4 +0,0 @@ -terraform { - backend "gcs" { - } -} \ No newline at end of file diff --git a/terraform/billing.tf b/terraform/billing.tf deleted file mode 100644 index a3a7b8c..0000000 --- a/terraform/billing.tf +++ /dev/null @@ -1,49 +0,0 @@ -#todo fix billing alert creation -#todo do not create billing if option in variables is off - -resource "google_billing_budget" "budget" { - billing_account = data.google_project.current_project.billing_account - display_name = "budget" - amount { - specified_amount { - currency_code = "USD" - units = var.billing-budget - } - } - - - budget_filter { - projects = ["projects/${data.google_project.current_project.number}"] - credit_types_treatment = "EXCLUDE_ALL_CREDITS" - calendar_period = "MONTH" - #services = ["services/24E6-581D-38E5"] # Bigquery - } - - threshold_rules { - threshold_percent = 0.5 - } - threshold_rules { - threshold_percent = 0.9 - } - threshold_rules { - threshold_percent = 1.0 - } - - #TODO add if not empty billing admins var. Else use default admins - all_updates_rule { - monitoring_notification_channels = [ - for k, v in google_monitoring_notification_channel.notification_channel : google_monitoring_notification_channel.notification_channel[k].id - ] - disable_default_iam_recipients = length(var.billing-admins) < 1 ? false : true - } -} - -resource "google_monitoring_notification_channel" "notification_channel" { - for_each = var.billing-admins - display_name = each.key - type = "email" - - labels = { - email_address = each.value - } -} \ No newline at end of file diff --git a/terraform/cloudbuild.yaml b/terraform/cloudbuild.yaml deleted file mode 100644 index 0a4b3b0..0000000 --- a/terraform/cloudbuild.yaml +++ /dev/null @@ -1,57 +0,0 @@ -steps: - - name: gcr.io/cloud-builders/git - args: - - '-c' - - 'git clone ${_GIT_REPO} repo --depth 1' - entrypoint: bash - - name: hashicorp/terraform - args: - - init - - '-backend-config=bucket=${_TF_BACKEND_BUCKET}' - - '-backend-config=prefix=${_TF_BACKEND_PREFIX}' - dir: repo/terraform - - name: hashicorp/terraform - args: - - plan - - '-var=project-id=${PROJECT_ID}' - - '-var=buildkite-api-token-readonly=$$BUILDKITE_API_TOKEN_READONLY' - - '-var=buildkite-agent-token=$$BUILDKITE_AGENT_TOKEN' - - '-var=conduit-api-token=$$CONDUIT_API_TOKEN' - - '-var=git-id-rsa=$$GIT_ID_RSA' - - '-var=id-rsa-pub=$$ID_RSA_PUB' - - '-var=git-known-hosts=$$GIT_KNOWN_HOSTS' - - '-out=/workspace/tfplan-${BUILD_ID}' - secretEnv: - - 'BUILDKITE_API_TOKEN_READONLY' - - 'BUILDKITE_AGENT_TOKEN' - - 'CONDUIT_API_TOKEN' - - 'GIT_ID_RSA' - - 'ID_RSA_PUB' - - 'GIT_KNOWN_HOSTS' - dir: repo/terraform -# - name: hashicorp/terraform -# args: -# - apply -# - '-auto-approve' -# - /workspace/tfplan-${BUILD_ID} -# dir: repo/terraform -substitutions: - _GIT_REPO: $(body.project.git_http_url) - _TF_BACKEND_BUCKET: 'terraform-state-${PROJECT_ID}' - _TF_BACKEND_PREFIX: terraform/state -availableSecrets: - secretManager: - - versionName: 'projects/${PROJECT_ID}/secrets/buildkite-api-token-readonly/versions/latest' - env: 'BUILDKITE_API_TOKEN_READONLY' - - versionName: 'projects/${PROJECT_ID}/secrets/buildkite-agent-token/versions/latest' - env: 'BUILDKITE_AGENT_TOKEN' - - versionName: 'projects/${PROJECT_ID}/secrets/conduit-api-token/versions/latest' - env: 'CONDUIT_API_TOKEN' - - versionName: 'projects/${PROJECT_ID}/secrets/git-id-rsa/versions/latest' - env: 'GIT_ID_RSA' - - versionName: 'projects/${PROJECT_ID}/secrets/id-rsa-pub/versions/latest' - env: 'ID_RSA_PUB' - - versionName: 'projects/${PROJECT_ID}/secrets/git-known-hosts/versions/latest' - env: 'GIT_KNOWN_HOSTS' -options: - dynamic_substitutions: true \ No newline at end of file diff --git a/terraform/cluster.tf b/terraform/cluster.tf deleted file mode 100644 index 47eea3d..0000000 --- a/terraform/cluster.tf +++ /dev/null @@ -1,179 +0,0 @@ -resource "google_service_account" "llvm_premerge_checks_sa" { - account_id = "llvm-premerge-checks-sa" - display_name = "Service Account used with the gke cluster" -} - -resource "google_project_iam_binding" "sa_gcr_reader_role" { - project = var.project-id - role = "roles/storage.objectViewer" - - members = [ - "serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}" - ] -} - -resource "google_project_iam_binding" "sa_mertics_writer_role" { - project = var.project-id - role = "roles/monitoring.metricWriter" - - members = [ - "serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}" - ] -} - -resource "google_project_iam_binding" "sa_logging_writer_role" { - project = var.project-id - role = "roles/logging.logWriter" - - members = [ - "serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}" - ] -} - -resource "google_container_cluster" "llvm_premerge_checks_cluster" { - name = "llvm-premerge-checks-cluster" - - location = var.zone - - network = google_compute_network.vpc_network.name - subnetwork = google_compute_subnetwork.vpc_subnetwork.name - - #enable_autopilot = true - initial_node_count = 1 - - #TODO: redo - # master_authorized_networks_config { - # cidr_blocks { - # cidr_block= "0.0.0.0/0" - # display_name = "everyone" - # } - # } - - private_cluster_config { - enable_private_nodes = true - enable_private_endpoint = false - master_ipv4_cidr_block = var.master-cidr #todo: var - } - ip_allocation_policy { - cluster_secondary_range_name = "pods" - services_secondary_range_name = "services" - } - depends_on = [google_project_service.google_api] -} - -resource "google_container_node_pool" "linux_agents_nodepool" { - name = "linux-agents" - cluster = google_container_cluster.llvm_premerge_checks_cluster.id - - node_config { - machine_type = var.linux-agents-machine-type - image_type = "cos_containerd" - disk_size_gb = 500 - disk_type = "pd-ssd" - - #todo: assign right permissions and use custom service account - service_account = google_service_account.llvm_premerge_checks_sa.email - oauth_scopes = [ - "https://www.googleapis.com/auth/cloud-platform" - ] - } - - autoscaling { - min_node_count = 0 - max_node_count = var.linux-agents-count - location_policy = "BALANCED" - } -} - -resource "google_container_node_pool" "windows_agents_nodepool" { - name = "windows-agents" - cluster = google_container_cluster.llvm_premerge_checks_cluster.id - - node_config { - machine_type = var.windows-agents-machine-type - image_type = "windows_ltsc_containerd" # todo ltsc or sac ? - disk_size_gb = 500 - disk_type = "pd-ssd" - - #todo: assign right permissions and use custom service account - service_account = google_service_account.llvm_premerge_checks_sa.email - oauth_scopes = [ - "https://www.googleapis.com/auth/cloud-platform" - ] - } - - autoscaling { - min_node_count = 1 - max_node_count = var.windows-agents-count - location_policy = "BALANCED" - } -} - -#todo recheck -data "google_client_config" "provider" {} - -provider "kubernetes" { - host = "https://${google_container_cluster.llvm_premerge_checks_cluster.endpoint}" - token = data.google_client_config.provider.access_token - cluster_ca_certificate = base64decode( - google_container_cluster.llvm_premerge_checks_cluster.master_auth[0].cluster_ca_certificate, - ) -} - -resource "kubernetes_manifest" "buildkite_namespace" { - manifest = yamldecode(templatefile("kubernetes/namespace.yaml", {})) -} - -resource "kubernetes_manifest" "buildkite_agent_token_secret" { - manifest = yamldecode(templatefile("kubernetes/secret-buildkite-token.yaml", { buildkite-agent-token = var.buildkite-agent-token })) - depends_on = [kubernetes_manifest.buildkite_namespace] -} - -resource "kubernetes_manifest" "buildkite_api_token_readonly_secret" { - manifest = yamldecode(templatefile("kubernetes/secret-buildkite-token-readonly.yaml", { buildkite-api-token-readonly = var.buildkite-api-token-readonly })) - depends_on = [kubernetes_manifest.buildkite_namespace] -} - -resource "kubernetes_manifest" "buildkite_github_secret" { - manifest = yamldecode(templatefile("kubernetes/secret-github-ssh.yaml", { git-id-rsa = var.git-id-rsa, id-rsa-pub = var.id-rsa-pub, git-known-hosts = var.git-known-hosts })) - depends_on = [kubernetes_manifest.buildkite_namespace] -} - -resource "kubernetes_manifest" "buildkite_conduit_api_token_secret" { - manifest = yamldecode(templatefile("kubernetes/secret-conduit-token.yaml", { conduit-api-token = var.conduit-api-token })) - depends_on = [kubernetes_manifest.buildkite_namespace] -} - -resource "kubernetes_manifest" "buildkite_linux_agent" { - manifest = yamldecode(templatefile("kubernetes/linux-agents.yaml", { - project-id = var.project-id, - gke-nodepool = google_container_node_pool.linux_agents_nodepool.name, - build-queue = var.linux-agents-build-queue, - cpu-request = var.linux-agents-cpu-request, - mem-request = var.linux-agents-mem-request, - replicas-count = var.linux-agents-count, - })) - depends_on = [kubernetes_manifest.buildkite_namespace] - # wait { - # fields = { - # "status.phase" = "Running" - # } - # } -} - -resource "kubernetes_manifest" "buildkite_windows_agent" { - manifest = yamldecode(templatefile("kubernetes/windows-agents.yaml", { - project-id = var.project-id, - gke-nodepool = google_container_node_pool.windows_agents_nodepool.name, - build-queue = var.windows-agents-build-queue, - cpu-request = var.windows-agents-cpu-request, - mem-request = var.windows-agents-mem-request, - replicas-count = var.windows-agents-count, - })) - depends_on = [kubernetes_manifest.buildkite_namespace] - # wait { - # fields = { - # "status.phase" = "Running" - # } - # } -} diff --git a/terraform/kubernetes/linux-agents.yaml b/terraform/kubernetes/linux-agents.yaml deleted file mode 100644 index 0e2cfb0..0000000 --- a/terraform/kubernetes/linux-agents.yaml +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://llvm.org/LICENSE.txt -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: linux-agents - namespace: buildkite -spec: - replicas: ${replicas-count} - strategy: - rollingUpdate: - maxSurge: 25% - maxUnavailable: 50% - type: RollingUpdate - selector: - matchLabels: - app: agent-premerge-debian - template: - metadata: - labels: - app: agent-premerge-debian - spec: - containers: - - name: buildkite-premerge-debian - image: gcr.io/${project-id}/buildkite-premerge-debian:latest - resources: - limits: - cpu: ${cpu-request} - memory: ${mem-request} - requests: - cpu: ${cpu-request} - memory: ${mem-request} - volumeMounts: - - name: github-ssh - mountPath: /mnt/ssh - - name: workdir - mountPath: /var/lib/buildkite-agent - env: - - name: BUILDKITE_AGENT_TOKEN - valueFrom: - secretKeyRef: - name: buildkite-agent-token - key: token - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: BUILDKITE_AGENT_TAGS - value: "queue=${build-queue},name=$(POD_NAME),project=${project-id}" - - name: BUILDKITE_BUILD_PATH - value: "/var/lib/buildkite-agent/builds" - - name: CONDUIT_TOKEN - valueFrom: - secretKeyRef: - name: conduit-api-token - key: token - - name: BUILDKITE_API_TOKEN - valueFrom: - secretKeyRef: - name: buildkite-api-token-readonly - key: token - volumes: - - name: github-ssh - secret: - secretName: github-ssh - - name: workdir - emptyDir: {} - nodeSelector: - cloud.google.com/gke-nodepool: ${gke-nodepool} - terminationGracePeriodSeconds: 60 diff --git a/terraform/kubernetes/namespace.yaml b/terraform/kubernetes/namespace.yaml deleted file mode 100644 index 70c9001..0000000 --- a/terraform/kubernetes/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: buildkite \ No newline at end of file diff --git a/terraform/kubernetes/secret-buildkite-token-readonly.yaml b/terraform/kubernetes/secret-buildkite-token-readonly.yaml deleted file mode 100644 index ab1b4f4..0000000 --- a/terraform/kubernetes/secret-buildkite-token-readonly.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: buildkite-api-token-readonly - namespace: buildkite -type: Opaque -data: - token: ${buildkite-api-token-readonly} \ No newline at end of file diff --git a/terraform/kubernetes/secret-buildkite-token.yaml b/terraform/kubernetes/secret-buildkite-token.yaml deleted file mode 100644 index 04182f2..0000000 --- a/terraform/kubernetes/secret-buildkite-token.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: buildkite-agent-token - namespace: buildkite -type: Opaque -data: - token: ${buildkite-agent-token} \ No newline at end of file diff --git a/terraform/kubernetes/secret-conduit-token.yaml b/terraform/kubernetes/secret-conduit-token.yaml deleted file mode 100644 index b2be1ae..0000000 --- a/terraform/kubernetes/secret-conduit-token.yaml +++ /dev/null @@ -1,8 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: conduit-api-token - namespace: buildkite -type: Opaque -data: - token: ${conduit-api-token} \ No newline at end of file diff --git a/terraform/kubernetes/secret-github-ssh.yaml b/terraform/kubernetes/secret-github-ssh.yaml deleted file mode 100644 index 0067ec2..0000000 --- a/terraform/kubernetes/secret-github-ssh.yaml +++ /dev/null @@ -1,10 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: github-ssh - namespace: buildkite -type: Opaque -data: - id_rsa: ${git-id-rsa} - id_rsa.pub: ${id-rsa-pub} - known_hosts: ${git-known-hosts} \ No newline at end of file diff --git a/terraform/kubernetes/windows-agents.yaml b/terraform/kubernetes/windows-agents.yaml deleted file mode 100644 index 35e710b..0000000 --- a/terraform/kubernetes/windows-agents.yaml +++ /dev/null @@ -1,79 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://llvm.org/LICENSE.txt -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apiVersion: apps/v1 -kind: Deployment -metadata: - name: windows-agents - namespace: buildkite -spec: - replicas: ${replicas-count} - strategy: - type: Recreate - selector: - matchLabels: - app: agent-premerge-windows - template: - metadata: - labels: - app: agent-premerge-windows - spec: - containers: - - name: buildkite-premerge-windows - image: gcr.io/${project-id}/buildkite-premerge-windows:latest - resources: - limits: - cpu: ${cpu-request} - memory: ${mem-request} - requests: - cpu: ${cpu-request} - memory: ${mem-request} - # volumeMounts: - # - name: workdir - # mountPath: "C:\\ws" - env: - - name: buildkiteAgentToken - valueFrom: - secretKeyRef: - name: buildkite-agent-token - key: token - - name: POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: BUILDKITE_AGENT_TAGS - value: "queue=${build-queue},name=$(POD_NAME),project=${project-id}" - # - name: BUILDKITE_BUILD_PATH - # value: "C:\\ws" - - name: CONDUIT_TOKEN - valueFrom: - secretKeyRef: - name: conduit-api-token - key: token - - name: BUILDKITE_API_TOKEN - valueFrom: - secretKeyRef: - name: buildkite-api-token-readonly - key: token - # volumes: - # - name: workdir - # emptyDir: {} - nodeSelector: - cloud.google.com/gke-nodepool: ${gke-nodepool} - tolerations: - - key: "node.kubernetes.io/os" - operator: "Equal" - value: "windows" - effect: "NoSchedule" - terminationGracePeriodSeconds: 60 diff --git a/terraform/main.tf b/terraform/main.tf deleted file mode 100644 index 4c743ea..0000000 --- a/terraform/main.tf +++ /dev/null @@ -1,92 +0,0 @@ -#todo automatically rebuild buildkite images - -data "google_project" "current_project" { - project_id = var.project-id -} - -locals { - cloud_build_sa_roles = ["roles/editor", "roles/storage.objectAdmin", "roles/secretmanager.secretAccessor","roles/secretmanager.viewer","roles/resourcemanager.projectIamAdmin"] - enabled_apis = [ - "secretmanager.googleapis.com", - "billingbudgets.googleapis.com", - "cloudbuild.googleapis.com", - "compute.googleapis.com", - "container.googleapis.com", - "cloudresourcemanager.googleapis.com", - "cloudbilling.googleapis.com" - ] -} - -#todo create separate sa for cloud build -# data "google_iam_policy" "cloud_build_sa" { -# binding { -# role = "roles/iam.serviceAccountUser" - -# members = [ -# "serviceAccount:${data.google_project.current_project.number}-compute@developer.gserviceaccount.com", -# ] -# } -# } - -# resource "google_service_account_iam_policy" "admin-account-iam" { -# service_account_id = "${data.google_project.current_project.id}/serviceAccounts/${data.google_project.current_project.number}@cloudbuild.gserviceaccount.com" -# policy_data = data.google_iam_policy.cloud_build_sa.policy_data -# } - -resource "google_project_iam_member" "cloudbuild_sa_roles" { - project = var.project-id - for_each = toset(local.cloud_build_sa_roles) - role = each.value - - member = "serviceAccount:${data.google_project.current_project.number}@cloudbuild.gserviceaccount.com" -} - -resource "google_project_service" "google_api" { - for_each = toset(local.enabled_apis) - service = each.value -} - -resource "google_storage_bucket" "terraform_state" { - name = "terraform-state-${var.project-id}" - uniform_bucket_level_access = true - location = "EU" - depends_on = [google_project_service.google_api] -} - -resource "google_compute_network" "vpc_network" { - name = "vpc-network" - auto_create_subnetworks = false -} - -resource "google_compute_subnetwork" "vpc_subnetwork" { - name = "subnetwork" - ip_cidr_range = var.subnetwork-main-cidr - region = var.region - network = google_compute_network.vpc_network.id - secondary_ip_range { - range_name = "pods" - ip_cidr_range = var.subnetwork-pods-cidr - } - secondary_ip_range { - range_name = "services" - ip_cidr_range = var.subnetwork-services-cidr - } -} - -resource "google_compute_router" "router" { - name = "router" - region = google_compute_subnetwork.vpc_subnetwork.region - network = google_compute_network.vpc_network.id - - bgp { - asn = 64514 #todo recheck - } -} - -resource "google_compute_router_nat" "nat" { - name = "router-nat" - router = google_compute_router.router.name - region = google_compute_router.router.region - nat_ip_allocate_option = "AUTO_ONLY" - source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES" -} diff --git a/terraform/provider.tf b/terraform/provider.tf deleted file mode 100644 index 1a55510..0000000 --- a/terraform/provider.tf +++ /dev/null @@ -1,7 +0,0 @@ -provider "google" { - project = var.project-id - region = var.region - zone = var.zone - billing_project = var.project-id - user_project_override = true -} \ No newline at end of file diff --git a/terraform/secrets.tf b/terraform/secrets.tf deleted file mode 100644 index 9a7aa68..0000000 --- a/terraform/secrets.tf +++ /dev/null @@ -1,26 +0,0 @@ -locals { - secrets = { - "buildkite-api-token-readonly": var.buildkite-api-token-readonly, - "buildkite-agent-token": var.buildkite-agent-token, - "conduit-api-token": var.conduit-api-token, - "git-id-rsa": var.git-id-rsa, - "id-rsa-pub": var.id-rsa-pub, - "git-known-hosts": var.git-known-hosts - } -} - -resource "google_secret_manager_secret" "secret" { - for_each = local.secrets - secret_id = each.key - - replication { - automatic = true - } - depends_on = [google_project_service.google_api] -} - -resource "google_secret_manager_secret_version" "secret_version" { - for_each = local.secrets - secret = google_secret_manager_secret.secret[each.key].id - secret_data = each.value -} \ No newline at end of file diff --git a/terraform/variables.tf b/terraform/variables.tf deleted file mode 100644 index 25d5a10..0000000 --- a/terraform/variables.tf +++ /dev/null @@ -1,117 +0,0 @@ -variable "project-id" { - type = string -} - -variable "billing-budget" { - type = number - default = "25000" -} - -variable "billing-admins" { - type = map(any) - default = {} -} - -variable "region" { - type = string - default = "europe-west3" -} - -variable "zone" { - type = string - default = "europe-west3-c" -} - -variable "subnetwork-main-cidr" { - type = string - default = "10.0.0.0/16" -} - -variable "master-cidr" { - type = string - default = "10.1.0.0/28" -} - -variable "subnetwork-pods-cidr" { - type = string - default = "10.2.0.0/16" -} - -variable "subnetwork-services-cidr" { - type = string - default = "10.3.0.0/16" -} - -variable "linux-agents-machine-type" { - type = string - default = "e2-standard-32" -} - -variable "linux-agents-count" { - type = number - default = 6 -} - -variable "linux-agents-build-queue" { - type = string - default = "linux" -} - -variable "linux-agents-cpu-request" { - type = string - default = "30" -} - -variable "linux-agents-mem-request" { - type = string - default = "80Gi" -} - -variable "windows-agents-machine-type" { - type = string - default = "c2-standard-16" -} - -variable "windows-agents-count" { - type = number - default = 8 -} - -variable "windows-agents-build-queue" { - type = string - default = "windows" -} - -variable "windows-agents-cpu-request" { - type = string - default = "15" -} - -variable "windows-agents-mem-request" { - type = string - default = "60Gi" -} - -variable "buildkite-api-token-readonly" { - type = string -} - -variable "buildkite-agent-token" { - type = string -} - -variable "conduit-api-token" { - type = string -} - -variable "git-id-rsa" { - type = string -} - -variable "id-rsa-pub" { - type = string -} - -variable "git-known-hosts" { - type = string -} diff --git a/terraform/variables.tfvars_example b/terraform/variables.tfvars_example deleted file mode 100644 index d8985cd..0000000 --- a/terraform/variables.tfvars_example +++ /dev/null @@ -1,21 +0,0 @@ -project-id = "" -#billing-admins = {"test": "test@test.com"} - -#linux-agents-machine-type = "e2-standard-8" -#linux-agents-count = 1 -#linux-agents-build-queue = "linux-test" -#linux-agents-cpu-request = "6" -#linux-agents-mem-request = "8Gi" - -#windows-agents-machine-type = "e2-standard-8" -#windows-agents-count = 1 -#windows-agents-build-queue = "windows-test" -#windows-agents-cpu-request = "6" -#windows-agents-mem-request = "8Gi" - -buildkite-api-token-readonly = "" -buildkite-agent-token = "" -conduit-api-token = "" -git-id-rsa = "" -id-rsa-pub = "" -git-known-hosts = "" \ No newline at end of file