1
0
Fork 0
This commit is contained in:
Mikhail Goncharov 2024-01-29 14:59:11 +01:00
parent ff879c8fa1
commit ace53cf13b
24 changed files with 9 additions and 1204 deletions

View file

@ -1,7 +0,0 @@
FROM python:3
RUN pip install flask gunicorn requests
ADD main.py /
CMD ["gunicorn", "--bind", "0.0.0.0:8080", "main:app"]

View file

@ -1,4 +0,0 @@
This is a small service to integrate Harbormaster and buildkite.
Located at http://build.llvm-merge-guard.org behind http auth and is not
publicly accessible as it's only used from Harbormaster.

View file

@ -1,34 +0,0 @@
#!/bin/bash
# Copyright 2019 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -eux
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
ROOT_DIR="$(dirname ${DIR})"
# get config options
IMAGE_NAME="phabricator-proxy"
docker build -t ${IMAGE_NAME} .
read -p "Push to registry? [yN]" -n 1 -r
echo
if [[ $REPLY =~ ^[Yy]$ ]]
then
source "${ROOT_DIR}/k8s_config"
QUALIFIED_NAME="${GCR_HOSTNAME}/${GCP_PROJECT}/${IMAGE_NAME}"
docker tag ${IMAGE_NAME} ${QUALIFIED_NAME}
docker push ${QUALIFIED_NAME}
fi

View file

@ -1,72 +0,0 @@
from cmath import log
from flask.logging import default_handler
from urllib.parse import urlparse, parse_qs
import flask
import json
import logging
import logging.handlers
import os
import requests
buildkite_api_token = os.getenv("BUILDKITE_API_TOKEN", "")
app = flask.Flask(__name__)
app.config["DEBUG"] = False
formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s')
errHandler = logging.FileHandler('error.log', encoding='utf-8',)
errHandler.setLevel(logging.ERROR)
errHandler.setFormatter(formatter)
app.logger.addHandler(errHandler)
rotatingHandler = logging.handlers.TimedRotatingFileHandler('info.log', when='D', encoding='utf-8', backupCount=8)
rotatingHandler.setFormatter(formatter)
app.logger.addHandler(rotatingHandler)
app.logger.setLevel(logging.INFO)
stdoutLog = logging.StreamHandler()
stdoutLog.setFormatter(formatter)
app.logger.addHandler(stdoutLog)
app.logger.removeHandler(default_handler)
@app.route('/', methods=['GET'])
def home():
return "Hi LLVM!"
@app.route('/build', methods=['POST', 'GET'])
def build():
app.logger.info('request: %s %s', flask.request, flask.request.url)
app.logger.info('headers: %s', flask.request.headers)
if flask.request.method == 'POST':
app.logger.info('data: %s', flask.request.data)
app.logger.info('form: %s', flask.request.form)
url = urlparse(flask.request.url)
params = parse_qs(url.query)
build_env = {}
for k, v in params.items():
if len(v) == 1:
build_env['ph_' + k] = v[0]
refspec = 'main'
if 'ph_scripts_refspec' in build_env:
refspec = build_env['ph_scripts_refspec']
build_request = {
'commit': 'HEAD',
'branch': refspec,
'env': build_env,
'message': f'D{build_env["ph_buildable_revision"]}',
}
app.logger.info('buildkite request: %s', build_request)
headers = {'Authorization': f'Bearer {buildkite_api_token}'}
response = requests.post(
'https://api.buildkite.com/v2/organizations/llvm-project'
'/pipelines/diff-checks/builds',
json=build_request,
headers=headers)
app.logger.info('buildkite response: %s %s', response.status_code, response.text)
rjs = json.loads(response.text)
return rjs['web_url']
else:
return "expected POST request"
if __name__ == '__main__':
app.run(host='0.0.0.0:8080')

View file

@ -1,196 +0,0 @@
#!/usr/bin/env python3
import csv
import datetime
import hashlib
import json
import numpy
import requests
import os
import re
import sys
from typing import Optional, List, Dict
from urllib.parse import urljoin
class Stage:
def __init__(self, stage_dict: Dict):
self.name = stage_dict['name']
self.success = stage_dict['status'].lower() == 'success'
self.start_time = datetime.datetime.fromtimestamp(stage_dict['startTimeMillis']/1000)
self.duration = datetime.timedelta(milliseconds=stage_dict['durationMillis'])
class Build:
def __init__(self, job_name: str, build_dict: Dict):
self.job_name = job_name
self.number = build_dict['number']
self.result = build_dict['result']
self.start_time = datetime.datetime.fromtimestamp(build_dict['timestamp']/1000)
self.duration = datetime.timedelta(milliseconds=build_dict['duration'])
self.stages = [] # type: List[Stage]
self.agent = None # type: Optional[str]
@property
def hour(self) -> datetime.datetime:
return datetime.datetime(
year=self.start_time.year,
month=self.start_time.month,
day=self.start_time.day,
hour=self.start_time.hour,
)
@property
def day(self) -> datetime.datetime:
return datetime.datetime(
year=self.start_time.year,
month=self.start_time.month,
day=self.start_time.day,
)
@property
def success(self):
if self.result is None:
return False
return self.result.lower() == 'success'
def update_from_wfdata(self, wfdata: Dict):
self.stages = [Stage(s) for s in wfdata['stages']]
class JenkinsStatsReader:
_TMP_DIR = 'tmp/jenkins'
def __init__(self):
self.username = None # type: Optional[str]
self.password = None # type: Optional[str]
self.jenkins_url = None # type: Optional[str]
self.jobs = [] # type: List[str]
self.builds = {} # type: Dict[str, List[Build]]
self._read_config()
self._session = requests.session()
self._session.auth = (self.username, self.password)
def _read_config(self, credential_path='~/.llvm-premerge-checks/jenkins-creds.json'):
with open(os.path.expanduser(credential_path)) as credential_file:
config = json.load(credential_file)
self.username = config['username']
self.password = config['password']
self.jenkins_url = config['jenkins_url']
@property
def job_names(self) -> List[str]:
return self.builds.keys()
def get_data(self):
jobnames = self.fetch_jobsnames()
print('Found {} jobs: {}'.format(len(jobnames), jobnames))
self.get_builds(jobnames)
# self.get_workflow_data()
self.get_build_agents()
self.create_statistics('hour')
self.create_statistics('day')
self.write_all_builds()
def cached_get(self, url, as_json: bool = True):
m = hashlib.sha256()
m.update(url.encode('utf-8'))
filename = m.digest().hex()
cache_file = os.path.join(self._TMP_DIR, filename)
if os.path.isfile(cache_file):
with open(cache_file, 'r') as json_file:
if as_json:
return json.load(json_file)
return json_file.read()
response = self._session.get(urljoin(self.jenkins_url, url))
if response.status_code != 200:
if response.status_code == 404:
return None
raise IOError('Could not read data from {}:\n{}'.format(url, response.text))
os.makedirs(self._TMP_DIR, exist_ok=True)
with open(cache_file, 'w') as jenkins_data_file:
jenkins_data_file.write(response.text)
if as_json:
return response.json()
return response.text
def fetch_jobsnames(self) -> List[str]:
data = self.cached_get('api/json?tree=jobs[name]')
return [job['name'] for job in data['jobs']]
def get_builds(self, job_names):
for job_name in job_names:
print('Gettings builds for: {}'.format(job_name))
build_data = self.cached_get('job/{}/api/json?tree=allBuilds[number,result,duration,timestamp,executor]'.format(job_name))
self.builds[job_name] = [Build(job_name, b) for b in build_data['allBuilds']]
print('{} has {} builds'.format(job_name, len(self.builds[job_name])))
def get_workflow_data(self):
print('Getting workflow data...')
for job_name, builds in self.builds.items():
for i, build in enumerate(builds):
wfdata = self.cached_get('job/{}/{}/wfapi/'.format(job_name, build.number))
build.update_from_wfdata(wfdata)
sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds)))
sys.stdout.flush()
def get_build_agents(self):
print('Getting agent names...')
for job_name, builds in self.builds.items():
for i, build in enumerate(builds):
console_log = self.cached_get('job/{}/{}/consoleText'.format(job_name, build.number), as_json=False)
if console_log is None:
continue
match = re.search(r'Running on ([\w-]+) in', console_log)
if match:
build.agent = match.group(1)
sys.stdout.write('\r{} [{}/{}]'.format(job_name, i, len(builds)))
sys.stdout.flush()
def create_statistics(self, group_by: str):
for job_name, builds in self.builds.items():
print('Writing data for {}'.format(job_name))
# TODO: add success/failure rates
fieldnames = ['date', '# builds', 'median duration', 'p90 duration', 'p95 duration', 'max duration']
csv_file = open('tmp/jenkins_{}_{}.csv'.format(job_name, group_by), 'w')
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel)
writer.writeheader()
build_hist = {}
for build in builds:
build_hist.setdefault(getattr(build, group_by), []).append(build)
for key in sorted(build_hist.keys()):
builds = build_hist[key] # type: List[Build]
durations = numpy.array([b.duration.seconds for b in builds])
writer.writerow({
'date': key,
'# builds': len(builds),
'median duration': numpy.median(durations)/60,
'p90 duration': numpy.percentile(durations, 90)/60,
'p95 duration': numpy.percentile(durations, 95)/60,
'max duration': numpy.max(durations)/60,
})
def write_all_builds(self):
fieldnames = ['date', 'job_name', 'build_number', 'duration', 'agent', 'success']
csv_file = open('tmp/jenkins_all_builds.csv', 'w')
writer = csv.DictWriter(csv_file, fieldnames=fieldnames, dialect=csv.excel)
writer.writeheader()
for job_name, builds in self.builds.items():
for build in builds:
writer.writerow({
'date': build.start_time,
'job_name': job_name,
'build_number': build.number,
'duration': build.duration.total_seconds()/60.0,
'agent': build.agent,
'success': build.success,
})
if __name__ == '__main__':
jsr = JenkinsStatsReader()
jsr.get_data()

View file

@ -77,7 +77,7 @@ def download_job_artifacts(conn):
c.execute(f""" c.execute(f"""
select ja.meta from select ja.meta from
(select j.key,j.id job_id, a->>'id' aid, a as meta from jobs j, json_array_elements(j.meta->'artifacts') as a) as ja (select j.key,j.id job_id, a->>'id' aid, a as meta from jobs j, json_array_elements(j.meta->'artifacts') as a) as ja
left join artifacts a on a.job_id = ja.job_id and a.id=ja.aid left join artifacts a on a.job_id = ja.job_id and a.id=ja.aid
where a.id IS NULL""") where a.id IS NULL""")
total = c.rowcount total = c.rowcount
logging.info(f'will download {total} artifacts') logging.info(f'will download {total} artifacts')
@ -207,7 +207,7 @@ def download_job_artifacts_list(conn):
logging.info('download jobs artifact lsits') logging.info('download jobs artifact lsits')
with conn.cursor() as c: with conn.cursor() as c:
c.execute(""" c.execute("""
SELECT key, raw->>'artifacts_url', meta SELECT key, raw->>'artifacts_url', meta
FROM jobs FROM jobs
WHERE (meta->>'artifacts' IS NULL) AND (raw->>'artifacts_url' IS NOT NULL)""") WHERE (meta->>'artifacts' IS NULL) AND (raw->>'artifacts_url' IS NOT NULL)""")
cnt = 0 cnt = 0
@ -283,10 +283,10 @@ if __name__ == '__main__':
logging.basicConfig(level='INFO', format='%(levelname)-7s %(message)s') logging.basicConfig(level='INFO', format='%(levelname)-7s %(message)s')
cn = connect() cn = connect()
logging.info('downloading buildkite data') logging.info('downloading buildkite data')
#insert_all_builds(cn) # insert_all_builds(cn)
insert_new_builds(cn) # insert_new_builds(cn)
update_running_builds(cn) # update_running_builds(cn)
insert_new_jobs(cn) # insert_new_jobs(cn)
download_job_artifacts_list(cn) # download_job_artifacts_list(cn)
download_job_artifacts(cn) # download_job_artifacts(cn)
download_job_logs(cn) # download_job_logs(cn)

37
terraform/.gitignore vendored
View file

@ -1,37 +0,0 @@
# Local .terraform directories
**/.terraform/*
# .tflock files
.terraform.lock.hcl
# .tfstate files
*.tfstate
*.tfstate.*
# Crash log files
crash.log
crash.*.log
# Exclude all .tfvars files, which are likely to contain sensitive data, such as
# password, private keys, and other secrets. These should not be part of version
# control as they are data points which are potentially sensitive and subject
# to change depending on the environment.
*.tfvars
*.tfvars.json
# Ignore override files as they are usually used to override resources locally and so
# are not checked in
override.tf
override.tf.json
*_override.tf
*_override.tf.json
# Include override files you do wish to add to version control using negated pattern
# !example_override.tf
# Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
# example: *tfplan*
# Ignore CLI configuration files
.terraformrc
terraform.rc

View file

@ -1,94 +0,0 @@
# Permissions
TODO
# Step 1: Bootstrap
Copy `variables.tfvars` from `variables.tfvars_example`
Replace the placeholders for `project-id` and `billing-account` in `variables.tfvars`
Insert secret values in the `variables.tfvars` file or insert values on runtime when using terraform plan or apply
### Initialise terraform with local backend
Comment out everything in `backend.tf` file to use local state for the first run as the bucket for storing the state is not created.
```
terraform -chdir=terraform init
```
### Create the state bucket
```
terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_storage_bucket.terraform_state"
```
To disable the conformation use `--auto-aprove` flag
## Migrate the state to the bucket.
Uncomment everything in `backend.tf` file to use remote state with newly created bucket.
```
export PROJECT_ID="<PROJECT_ID>"
```
```
terraform -chdir=terraform init -backend-config="bucket=terraform-state-${PROJECT_ID}" -backend-config="prefix=terraform/state"
```
## Create the secrets.
```
terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_secret_manager_secret.secret"
```
## Create the cluster.
Due to the problem described [here](https://github.com/hashicorp/terraform-provider-kubernetes/issues/1775) terraform kubernetes provider requires kubernetes cluster to be created first. So to create the cluster without applying kubernetes resources we will do the apply in 2 runs using the `-target` flag.
```
terraform -chdir=terraform apply -var-file=variables.tfvars -target="google_container_cluster.llvm_premerge_checks_cluster"
```
## Build the builders
To deploy build workers you need the worker docker images in your project.
TODO cloud build SA permissions
### Linux worker image
Execute cloud build to build Linux worker:
```
gcloud builds submit --config=containers/buildkite-premerge-debian/cloudbuild.yaml containers/buildkite-premerge-debian/ --project=${PROJECT_ID}
```
### Windows worker image
Build windows cloud builder. Follow the steps described here: [link](https://github.com/GoogleCloudPlatform/cloud-builders-community/tree/master/windows-builder)
Execute cloud build to build Windows worker:
```
gcloud builds submit --config=containers/buildkite-premerge-windows/cloudbuild.yaml containers/buildkite-premerge-windows/ --project=${PROJECT_ID}
```
## Create the rest of the gcp resources including workers in kubernetes pods
```
terraform -chdir=terraform apply -var-file="variables.tfvars"
```
## Terraform cloud build automation
Manual trigger
```
gcloud builds submit --config=terraform/cloudbuild.yaml terraform --project=${PROJECT_ID} --substitutions=_GIT_REPO=${GIT_REPO}
```
Automatic trigger:
```
<TODO>
```
# Budget
Budget alerts set on the monthly basis. Notification emails will be triggered on 50%, 90% and 100% of you budget.
## Actions to reduce project costs
Adjust the GKE cluster nodes: Set the node count to 1 per built platform (Linux and Windows). The node count can be controlled from the `variables.tfvars` file using the `linux-agents-count` and `windows-agents-count` parameters. If these parameters are not set in the `variables.tfvars` file, you can check the default parameters configured in the `variables.tf` file.
## Break glass procedure
Regarding the break glass procedure for emergency configuration of the Kubernetes build nodes, you can use the following set of gcloud commands. It is recommended to execute them from the [Cloud Shell](https://cloud.google.com/shell/docs/using-cloud-shell) for simplicity. Please note that these commands assume you have the necessary permissions and authentication to access and modify the GKE cluster.
```
export PROJECT_ID=<your project id>
export ZONE="europe-west3-c"
gcloud container clusters get-credentials ${PROJECT_ID}-cluster --zone ${ZONE} --project ${PROJECT_ID}
#gcloud container clusters update llvm-premerge-checks-cluster --node-pool linux-agents --zone ${ZONE} --project ${PROJECT_ID} --no-enable-autoscaling
gcloud container clusters resize llvm-premerge-checks-cluster --node-pool linux-agents --num-nodes 1 --zone ${ZONE} --project ${PROJECT_ID}
#gcloud container clusters update llvm-premerge-checks-cluster --node-pool windows-agents --zone ${ZONE} --project ${PROJECT_ID} --no-enable-autoscaling
gcloud container clusters resize llvm-premerge-checks-cluster --node-pool windows-agents --num-nodes 1 --zone ${ZONE} --project ${PROJECT_ID}
```
These commands will scale down the deployments to have a single replica for both the Linux and Windows agents. Adjust the replica count as needed. Please note that making changes to your GKE cluster configuration or scaling down nodes may impact the availability and performance of the pipeline.

View file

@ -1,4 +0,0 @@
terraform {
backend "gcs" {
}
}

View file

@ -1,49 +0,0 @@
#todo fix billing alert creation
#todo do not create billing if option in variables is off
resource "google_billing_budget" "budget" {
billing_account = data.google_project.current_project.billing_account
display_name = "budget"
amount {
specified_amount {
currency_code = "USD"
units = var.billing-budget
}
}
budget_filter {
projects = ["projects/${data.google_project.current_project.number}"]
credit_types_treatment = "EXCLUDE_ALL_CREDITS"
calendar_period = "MONTH"
#services = ["services/24E6-581D-38E5"] # Bigquery
}
threshold_rules {
threshold_percent = 0.5
}
threshold_rules {
threshold_percent = 0.9
}
threshold_rules {
threshold_percent = 1.0
}
#TODO add if not empty billing admins var. Else use default admins
all_updates_rule {
monitoring_notification_channels = [
for k, v in google_monitoring_notification_channel.notification_channel : google_monitoring_notification_channel.notification_channel[k].id
]
disable_default_iam_recipients = length(var.billing-admins) < 1 ? false : true
}
}
resource "google_monitoring_notification_channel" "notification_channel" {
for_each = var.billing-admins
display_name = each.key
type = "email"
labels = {
email_address = each.value
}
}

View file

@ -1,57 +0,0 @@
steps:
- name: gcr.io/cloud-builders/git
args:
- '-c'
- 'git clone ${_GIT_REPO} repo --depth 1'
entrypoint: bash
- name: hashicorp/terraform
args:
- init
- '-backend-config=bucket=${_TF_BACKEND_BUCKET}'
- '-backend-config=prefix=${_TF_BACKEND_PREFIX}'
dir: repo/terraform
- name: hashicorp/terraform
args:
- plan
- '-var=project-id=${PROJECT_ID}'
- '-var=buildkite-api-token-readonly=$$BUILDKITE_API_TOKEN_READONLY'
- '-var=buildkite-agent-token=$$BUILDKITE_AGENT_TOKEN'
- '-var=conduit-api-token=$$CONDUIT_API_TOKEN'
- '-var=git-id-rsa=$$GIT_ID_RSA'
- '-var=id-rsa-pub=$$ID_RSA_PUB'
- '-var=git-known-hosts=$$GIT_KNOWN_HOSTS'
- '-out=/workspace/tfplan-${BUILD_ID}'
secretEnv:
- 'BUILDKITE_API_TOKEN_READONLY'
- 'BUILDKITE_AGENT_TOKEN'
- 'CONDUIT_API_TOKEN'
- 'GIT_ID_RSA'
- 'ID_RSA_PUB'
- 'GIT_KNOWN_HOSTS'
dir: repo/terraform
# - name: hashicorp/terraform
# args:
# - apply
# - '-auto-approve'
# - /workspace/tfplan-${BUILD_ID}
# dir: repo/terraform
substitutions:
_GIT_REPO: $(body.project.git_http_url)
_TF_BACKEND_BUCKET: 'terraform-state-${PROJECT_ID}'
_TF_BACKEND_PREFIX: terraform/state
availableSecrets:
secretManager:
- versionName: 'projects/${PROJECT_ID}/secrets/buildkite-api-token-readonly/versions/latest'
env: 'BUILDKITE_API_TOKEN_READONLY'
- versionName: 'projects/${PROJECT_ID}/secrets/buildkite-agent-token/versions/latest'
env: 'BUILDKITE_AGENT_TOKEN'
- versionName: 'projects/${PROJECT_ID}/secrets/conduit-api-token/versions/latest'
env: 'CONDUIT_API_TOKEN'
- versionName: 'projects/${PROJECT_ID}/secrets/git-id-rsa/versions/latest'
env: 'GIT_ID_RSA'
- versionName: 'projects/${PROJECT_ID}/secrets/id-rsa-pub/versions/latest'
env: 'ID_RSA_PUB'
- versionName: 'projects/${PROJECT_ID}/secrets/git-known-hosts/versions/latest'
env: 'GIT_KNOWN_HOSTS'
options:
dynamic_substitutions: true

View file

@ -1,179 +0,0 @@
resource "google_service_account" "llvm_premerge_checks_sa" {
account_id = "llvm-premerge-checks-sa"
display_name = "Service Account used with the gke cluster"
}
resource "google_project_iam_binding" "sa_gcr_reader_role" {
project = var.project-id
role = "roles/storage.objectViewer"
members = [
"serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}"
]
}
resource "google_project_iam_binding" "sa_mertics_writer_role" {
project = var.project-id
role = "roles/monitoring.metricWriter"
members = [
"serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}"
]
}
resource "google_project_iam_binding" "sa_logging_writer_role" {
project = var.project-id
role = "roles/logging.logWriter"
members = [
"serviceAccount:${google_service_account.llvm_premerge_checks_sa.email}"
]
}
resource "google_container_cluster" "llvm_premerge_checks_cluster" {
name = "llvm-premerge-checks-cluster"
location = var.zone
network = google_compute_network.vpc_network.name
subnetwork = google_compute_subnetwork.vpc_subnetwork.name
#enable_autopilot = true
initial_node_count = 1
#TODO: redo
# master_authorized_networks_config {
# cidr_blocks {
# cidr_block= "0.0.0.0/0"
# display_name = "everyone"
# }
# }
private_cluster_config {
enable_private_nodes = true
enable_private_endpoint = false
master_ipv4_cidr_block = var.master-cidr #todo: var
}
ip_allocation_policy {
cluster_secondary_range_name = "pods"
services_secondary_range_name = "services"
}
depends_on = [google_project_service.google_api]
}
resource "google_container_node_pool" "linux_agents_nodepool" {
name = "linux-agents"
cluster = google_container_cluster.llvm_premerge_checks_cluster.id
node_config {
machine_type = var.linux-agents-machine-type
image_type = "cos_containerd"
disk_size_gb = 500
disk_type = "pd-ssd"
#todo: assign right permissions and use custom service account
service_account = google_service_account.llvm_premerge_checks_sa.email
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]
}
autoscaling {
min_node_count = 0
max_node_count = var.linux-agents-count
location_policy = "BALANCED"
}
}
resource "google_container_node_pool" "windows_agents_nodepool" {
name = "windows-agents"
cluster = google_container_cluster.llvm_premerge_checks_cluster.id
node_config {
machine_type = var.windows-agents-machine-type
image_type = "windows_ltsc_containerd" # todo ltsc or sac ?
disk_size_gb = 500
disk_type = "pd-ssd"
#todo: assign right permissions and use custom service account
service_account = google_service_account.llvm_premerge_checks_sa.email
oauth_scopes = [
"https://www.googleapis.com/auth/cloud-platform"
]
}
autoscaling {
min_node_count = 1
max_node_count = var.windows-agents-count
location_policy = "BALANCED"
}
}
#todo recheck
data "google_client_config" "provider" {}
provider "kubernetes" {
host = "https://${google_container_cluster.llvm_premerge_checks_cluster.endpoint}"
token = data.google_client_config.provider.access_token
cluster_ca_certificate = base64decode(
google_container_cluster.llvm_premerge_checks_cluster.master_auth[0].cluster_ca_certificate,
)
}
resource "kubernetes_manifest" "buildkite_namespace" {
manifest = yamldecode(templatefile("kubernetes/namespace.yaml", {}))
}
resource "kubernetes_manifest" "buildkite_agent_token_secret" {
manifest = yamldecode(templatefile("kubernetes/secret-buildkite-token.yaml", { buildkite-agent-token = var.buildkite-agent-token }))
depends_on = [kubernetes_manifest.buildkite_namespace]
}
resource "kubernetes_manifest" "buildkite_api_token_readonly_secret" {
manifest = yamldecode(templatefile("kubernetes/secret-buildkite-token-readonly.yaml", { buildkite-api-token-readonly = var.buildkite-api-token-readonly }))
depends_on = [kubernetes_manifest.buildkite_namespace]
}
resource "kubernetes_manifest" "buildkite_github_secret" {
manifest = yamldecode(templatefile("kubernetes/secret-github-ssh.yaml", { git-id-rsa = var.git-id-rsa, id-rsa-pub = var.id-rsa-pub, git-known-hosts = var.git-known-hosts }))
depends_on = [kubernetes_manifest.buildkite_namespace]
}
resource "kubernetes_manifest" "buildkite_conduit_api_token_secret" {
manifest = yamldecode(templatefile("kubernetes/secret-conduit-token.yaml", { conduit-api-token = var.conduit-api-token }))
depends_on = [kubernetes_manifest.buildkite_namespace]
}
resource "kubernetes_manifest" "buildkite_linux_agent" {
manifest = yamldecode(templatefile("kubernetes/linux-agents.yaml", {
project-id = var.project-id,
gke-nodepool = google_container_node_pool.linux_agents_nodepool.name,
build-queue = var.linux-agents-build-queue,
cpu-request = var.linux-agents-cpu-request,
mem-request = var.linux-agents-mem-request,
replicas-count = var.linux-agents-count,
}))
depends_on = [kubernetes_manifest.buildkite_namespace]
# wait {
# fields = {
# "status.phase" = "Running"
# }
# }
}
resource "kubernetes_manifest" "buildkite_windows_agent" {
manifest = yamldecode(templatefile("kubernetes/windows-agents.yaml", {
project-id = var.project-id,
gke-nodepool = google_container_node_pool.windows_agents_nodepool.name,
build-queue = var.windows-agents-build-queue,
cpu-request = var.windows-agents-cpu-request,
mem-request = var.windows-agents-mem-request,
replicas-count = var.windows-agents-count,
}))
depends_on = [kubernetes_manifest.buildkite_namespace]
# wait {
# fields = {
# "status.phase" = "Running"
# }
# }
}

View file

@ -1,82 +0,0 @@
# Copyright 2022 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: Deployment
metadata:
name: linux-agents
namespace: buildkite
spec:
replicas: ${replicas-count}
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 50%
type: RollingUpdate
selector:
matchLabels:
app: agent-premerge-debian
template:
metadata:
labels:
app: agent-premerge-debian
spec:
containers:
- name: buildkite-premerge-debian
image: gcr.io/${project-id}/buildkite-premerge-debian:latest
resources:
limits:
cpu: ${cpu-request}
memory: ${mem-request}
requests:
cpu: ${cpu-request}
memory: ${mem-request}
volumeMounts:
- name: github-ssh
mountPath: /mnt/ssh
- name: workdir
mountPath: /var/lib/buildkite-agent
env:
- name: BUILDKITE_AGENT_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-agent-token
key: token
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: BUILDKITE_AGENT_TAGS
value: "queue=${build-queue},name=$(POD_NAME),project=${project-id}"
- name: BUILDKITE_BUILD_PATH
value: "/var/lib/buildkite-agent/builds"
- name: CONDUIT_TOKEN
valueFrom:
secretKeyRef:
name: conduit-api-token
key: token
- name: BUILDKITE_API_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-api-token-readonly
key: token
volumes:
- name: github-ssh
secret:
secretName: github-ssh
- name: workdir
emptyDir: {}
nodeSelector:
cloud.google.com/gke-nodepool: ${gke-nodepool}
terminationGracePeriodSeconds: 60

View file

@ -1,4 +0,0 @@
apiVersion: v1
kind: Namespace
metadata:
name: buildkite

View file

@ -1,8 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: buildkite-api-token-readonly
namespace: buildkite
type: Opaque
data:
token: ${buildkite-api-token-readonly}

View file

@ -1,8 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: buildkite-agent-token
namespace: buildkite
type: Opaque
data:
token: ${buildkite-agent-token}

View file

@ -1,8 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: conduit-api-token
namespace: buildkite
type: Opaque
data:
token: ${conduit-api-token}

View file

@ -1,10 +0,0 @@
apiVersion: v1
kind: Secret
metadata:
name: github-ssh
namespace: buildkite
type: Opaque
data:
id_rsa: ${git-id-rsa}
id_rsa.pub: ${id-rsa-pub}
known_hosts: ${git-known-hosts}

View file

@ -1,79 +0,0 @@
# Copyright 2022 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: Deployment
metadata:
name: windows-agents
namespace: buildkite
spec:
replicas: ${replicas-count}
strategy:
type: Recreate
selector:
matchLabels:
app: agent-premerge-windows
template:
metadata:
labels:
app: agent-premerge-windows
spec:
containers:
- name: buildkite-premerge-windows
image: gcr.io/${project-id}/buildkite-premerge-windows:latest
resources:
limits:
cpu: ${cpu-request}
memory: ${mem-request}
requests:
cpu: ${cpu-request}
memory: ${mem-request}
# volumeMounts:
# - name: workdir
# mountPath: "C:\\ws"
env:
- name: buildkiteAgentToken
valueFrom:
secretKeyRef:
name: buildkite-agent-token
key: token
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: BUILDKITE_AGENT_TAGS
value: "queue=${build-queue},name=$(POD_NAME),project=${project-id}"
# - name: BUILDKITE_BUILD_PATH
# value: "C:\\ws"
- name: CONDUIT_TOKEN
valueFrom:
secretKeyRef:
name: conduit-api-token
key: token
- name: BUILDKITE_API_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-api-token-readonly
key: token
# volumes:
# - name: workdir
# emptyDir: {}
nodeSelector:
cloud.google.com/gke-nodepool: ${gke-nodepool}
tolerations:
- key: "node.kubernetes.io/os"
operator: "Equal"
value: "windows"
effect: "NoSchedule"
terminationGracePeriodSeconds: 60

View file

@ -1,92 +0,0 @@
#todo automatically rebuild buildkite images
data "google_project" "current_project" {
project_id = var.project-id
}
locals {
cloud_build_sa_roles = ["roles/editor", "roles/storage.objectAdmin", "roles/secretmanager.secretAccessor","roles/secretmanager.viewer","roles/resourcemanager.projectIamAdmin"]
enabled_apis = [
"secretmanager.googleapis.com",
"billingbudgets.googleapis.com",
"cloudbuild.googleapis.com",
"compute.googleapis.com",
"container.googleapis.com",
"cloudresourcemanager.googleapis.com",
"cloudbilling.googleapis.com"
]
}
#todo create separate sa for cloud build
# data "google_iam_policy" "cloud_build_sa" {
# binding {
# role = "roles/iam.serviceAccountUser"
# members = [
# "serviceAccount:${data.google_project.current_project.number}-compute@developer.gserviceaccount.com",
# ]
# }
# }
# resource "google_service_account_iam_policy" "admin-account-iam" {
# service_account_id = "${data.google_project.current_project.id}/serviceAccounts/${data.google_project.current_project.number}@cloudbuild.gserviceaccount.com"
# policy_data = data.google_iam_policy.cloud_build_sa.policy_data
# }
resource "google_project_iam_member" "cloudbuild_sa_roles" {
project = var.project-id
for_each = toset(local.cloud_build_sa_roles)
role = each.value
member = "serviceAccount:${data.google_project.current_project.number}@cloudbuild.gserviceaccount.com"
}
resource "google_project_service" "google_api" {
for_each = toset(local.enabled_apis)
service = each.value
}
resource "google_storage_bucket" "terraform_state" {
name = "terraform-state-${var.project-id}"
uniform_bucket_level_access = true
location = "EU"
depends_on = [google_project_service.google_api]
}
resource "google_compute_network" "vpc_network" {
name = "vpc-network"
auto_create_subnetworks = false
}
resource "google_compute_subnetwork" "vpc_subnetwork" {
name = "subnetwork"
ip_cidr_range = var.subnetwork-main-cidr
region = var.region
network = google_compute_network.vpc_network.id
secondary_ip_range {
range_name = "pods"
ip_cidr_range = var.subnetwork-pods-cidr
}
secondary_ip_range {
range_name = "services"
ip_cidr_range = var.subnetwork-services-cidr
}
}
resource "google_compute_router" "router" {
name = "router"
region = google_compute_subnetwork.vpc_subnetwork.region
network = google_compute_network.vpc_network.id
bgp {
asn = 64514 #todo recheck
}
}
resource "google_compute_router_nat" "nat" {
name = "router-nat"
router = google_compute_router.router.name
region = google_compute_router.router.region
nat_ip_allocate_option = "AUTO_ONLY"
source_subnetwork_ip_ranges_to_nat = "ALL_SUBNETWORKS_ALL_IP_RANGES"
}

View file

@ -1,7 +0,0 @@
provider "google" {
project = var.project-id
region = var.region
zone = var.zone
billing_project = var.project-id
user_project_override = true
}

View file

@ -1,26 +0,0 @@
locals {
secrets = {
"buildkite-api-token-readonly": var.buildkite-api-token-readonly,
"buildkite-agent-token": var.buildkite-agent-token,
"conduit-api-token": var.conduit-api-token,
"git-id-rsa": var.git-id-rsa,
"id-rsa-pub": var.id-rsa-pub,
"git-known-hosts": var.git-known-hosts
}
}
resource "google_secret_manager_secret" "secret" {
for_each = local.secrets
secret_id = each.key
replication {
automatic = true
}
depends_on = [google_project_service.google_api]
}
resource "google_secret_manager_secret_version" "secret_version" {
for_each = local.secrets
secret = google_secret_manager_secret.secret[each.key].id
secret_data = each.value
}

View file

@ -1,117 +0,0 @@
variable "project-id" {
type = string
}
variable "billing-budget" {
type = number
default = "25000"
}
variable "billing-admins" {
type = map(any)
default = {}
}
variable "region" {
type = string
default = "europe-west3"
}
variable "zone" {
type = string
default = "europe-west3-c"
}
variable "subnetwork-main-cidr" {
type = string
default = "10.0.0.0/16"
}
variable "master-cidr" {
type = string
default = "10.1.0.0/28"
}
variable "subnetwork-pods-cidr" {
type = string
default = "10.2.0.0/16"
}
variable "subnetwork-services-cidr" {
type = string
default = "10.3.0.0/16"
}
variable "linux-agents-machine-type" {
type = string
default = "e2-standard-32"
}
variable "linux-agents-count" {
type = number
default = 6
}
variable "linux-agents-build-queue" {
type = string
default = "linux"
}
variable "linux-agents-cpu-request" {
type = string
default = "30"
}
variable "linux-agents-mem-request" {
type = string
default = "80Gi"
}
variable "windows-agents-machine-type" {
type = string
default = "c2-standard-16"
}
variable "windows-agents-count" {
type = number
default = 8
}
variable "windows-agents-build-queue" {
type = string
default = "windows"
}
variable "windows-agents-cpu-request" {
type = string
default = "15"
}
variable "windows-agents-mem-request" {
type = string
default = "60Gi"
}
variable "buildkite-api-token-readonly" {
type = string
}
variable "buildkite-agent-token" {
type = string
}
variable "conduit-api-token" {
type = string
}
variable "git-id-rsa" {
type = string
}
variable "id-rsa-pub" {
type = string
}
variable "git-known-hosts" {
type = string
}

View file

@ -1,21 +0,0 @@
project-id = ""
#billing-admins = {"test": "test@test.com"}
#linux-agents-machine-type = "e2-standard-8"
#linux-agents-count = 1
#linux-agents-build-queue = "linux-test"
#linux-agents-cpu-request = "6"
#linux-agents-mem-request = "8Gi"
#windows-agents-machine-type = "e2-standard-8"
#windows-agents-count = 1
#windows-agents-build-queue = "windows-test"
#windows-agents-cpu-request = "6"
#windows-agents-mem-request = "8Gi"
buildkite-api-token-readonly = ""
buildkite-agent-token = ""
conduit-api-token = ""
git-id-rsa = ""
id-rsa-pub = ""
git-known-hosts = ""