1
0
Fork 0

Add graceful termination period for linux agents

Now builds will not be dropped on cluster upgrades. That requires
container updates.
This commit is contained in:
Mikhail Goncharov 2021-05-14 14:54:57 +02:00
parent 134ca4b801
commit e703a856cb
7 changed files with 173 additions and 21 deletions

View file

@ -5,9 +5,11 @@ RUN echo 'install buildkite' ;\
sh -c 'echo deb https://apt.buildkite.com/buildkite-agent stable main > /etc/apt/sources.list.d/buildkite-agent.list' ;\
apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 32A37959C2FA5C3C99EFBC32A79206696452D198 ;\
apt-get update ;\
apt-get install -y buildkite-agent; \
apt-get install -y buildkite-agent tini; \
apt-get clean;
COPY *.sh /usr/local/bin/
RUN chmod og+rx /usr/local/bin/*.sh
COPY --chown=buildkite-agent:buildkite-agent pre-checkout /etc/buildkite-agent/hooks
CMD ["start_agent.sh"]
ENTRYPOINT ["entrypoint.sh"]
CMD ["buildkite-agent", "start", "--no-color"]

View file

@ -1,5 +1,6 @@
#!/usr/bin/env bash
# Copyright 2020 Google LLC
# Copyright 2021 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
@ -12,11 +13,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -euo pipefail
# Buildkite installation creates 'buildkite-agent' user.
USER=buildkite-agent
# prepare work directory
mkdir -p "${BUILDKITE_BUILD_PATH}"
chown -R ${USER}:${USER} "${BUILDKITE_BUILD_PATH}"
@ -26,12 +25,8 @@ mkdir -p "${CCACHE_DIR}"
chown -R ${USER}:${USER} "${CCACHE_DIR}"
# /mnt/ssh should contain known_hosts, id_rsa and id_rsa.pub .
mkdir -p /var/lib/buildkite-agent/.ssh
cp /mnt/ssh/* /var/lib/buildkite-agent/.ssh
chmod 700 /var/lib/buildkite-agent/.ssh
chmod 600 /var/lib/buildkite-agent/.ssh/*
chown -R $USER:$USER /var/lib/buildkite-agent/.ssh
su buildkite-agent -c "buildkite-agent start"
echo "agent exited"
sleep 10m
mkdir -p ~/.ssh
chmod 700 ~/.ssh
cp /mnt/ssh/* ~/.ssh
chmod 600 ~/.ssh/*
exec /usr/bin/tini -g -- $@

View file

@ -190,7 +190,7 @@ Most commonly used are:
- `ph_projects`: which projects to use, "detect" will look on diff to infer the projects, "default" selects all projects.
- `ph_notify_email`: comma-separated list of email addresses to be notified when build is complete.
- `ph_log_level` ("DEBUG", "INFO", "WARNING" (default) or "ERROR"): log level for build scripts.
- `ph_linux_agents`, `ph_windows_agents`: custom JSON constraints on agents. For example, you might put one machine to a custom queue if it's errornous and send jobs to it with `ph_windows_agents="{{\"queue\": \"custom\"}}"`.
- `ph_linux_agents`, `ph_windows_agents`: custom JSON constraints on agents. For example, you might put one machine to a custom queue if it's errornous and send jobs to it with `ph_windows_agents={"queue": "custom"}`.
- `ph_skip_linux`, `ph_skip_windows` (if set to any value): skip build on this OS.
- `ph_skip_generated`: don't run custom steps generated from within llvm-project.

View file

@ -0,0 +1,80 @@
# Copyright 2021 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: Deployment
metadata:
name: linux-agents-test
namespace: buildkite
spec:
replicas: 1
selector:
matchLabels:
app: agent-premerge-debian
template:
metadata:
labels:
app: agent-premerge-debian
spec:
containers:
- name: buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian:latest
resources:
limits:
cpu: 15
memory: 50Gi
requests:
cpu: 15
memory: 50Gi
volumeMounts:
- name: ssd
mountPath: /mnt/disks/ssd0
- name: github-ssh
mountPath: /mnt/ssh
env:
- name: BUILDKITE_AGENT_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-agent-token
key: token
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: BUILDKITE_AGENT_TAGS
value: "queue=linux-test,name=$(POD_NAME)"
- name: BUILDKITE_BUILD_PATH
value: "/mnt/disks/ssd0/agent"
- name: CONDUIT_TOKEN
valueFrom:
secretKeyRef:
name: conduit-api-token
key: token
- name: BUILDKITE_API_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-api-token-readonly
key: token
volumes:
- name: ssd
hostPath:
# directory location on host
path: /mnt/disks/ssd0
type: Directory
- name: github-ssh
secret:
secretName: github-ssh
nodeSelector:
cloud.google.com/gke-nodepool: linux-agents
terminationGracePeriodSeconds: 3600

View file

@ -22,7 +22,7 @@ spec:
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
maxUnavailable: 50%
type: RollingUpdate
selector:
matchLabels:
@ -34,7 +34,7 @@ spec:
spec:
containers:
- name: buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian:stable
resources:
limits:
cpu: 30
@ -81,4 +81,5 @@ spec:
secret:
secretName: github-ssh
nodeSelector:
cloud.google.com/gke-nodepool: linux-agents
cloud.google.com/gke-nodepool: linux-agents
terminationGracePeriodSeconds: 3600

View file

@ -0,0 +1,73 @@
# Copyright 2021 Google LLC
#
# Licensed under the the Apache License v2.0 with LLVM Exceptions (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://llvm.org/LICENSE.txt
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
apiVersion: apps/v1
kind: Deployment
metadata:
name: service-agents-test
namespace: buildkite
spec:
replicas: 1
selector:
matchLabels:
app: agent-premerge-debian
template:
metadata:
labels:
app: agent-premerge-debian
spec:
containers:
- name: buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian:latest
resources:
limits:
cpu: 2
memory: 5Gi
requests:
cpu: 1.5
memory: 5Gi
volumeMounts:
- name: github-ssh
mountPath: /mnt/ssh
env:
- name: BUILDKITE_AGENT_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-agent-token
key: token
- name: POD_NAME
valueFrom:
fieldRef:
fieldPath: metadata.name
- name: BUILDKITE_AGENT_TAGS
value: "queue=service-test,name=$(POD_NAME)"
- name: BUILDKITE_BUILD_PATH
value: "/var/lib/buildkite-agent/builds"
- name: CONDUIT_TOKEN
valueFrom:
secretKeyRef:
name: conduit-api-token
key: token
- name: BUILDKITE_API_TOKEN
valueFrom:
secretKeyRef:
name: buildkite-api-token-readonly
key: token
volumes:
- name: github-ssh
secret:
secretName: github-ssh
nodeSelector:
cloud.google.com/gke-nodepool: default-pool
terminationGracePeriodSeconds: 1200

View file

@ -34,7 +34,7 @@ spec:
spec:
containers:
- name: buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian
image: gcr.io/llvm-premerge-checks/buildkite-premerge-debian:stable
resources:
limits:
cpu: 2
@ -72,4 +72,5 @@ spec:
secret:
secretName: github-ssh
nodeSelector:
cloud.google.com/gke-nodepool: default-pool
cloud.google.com/gke-nodepool: default-pool
terminationGracePeriodSeconds: 1200