Skip to content

Commit 13e6708

Browse files
authored
Enable ARM instances on Cortex (#2268)
1 parent f0948be commit 13e6708

File tree

30 files changed

+363
-194
lines changed

30 files changed

+363
-194
lines changed

.circleci/config.yml

+5-1
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,7 @@ jobs:
121121
- checkout
122122
- setup_remote_docker
123123
- install-go
124-
- run: sudo apt update && sudo apt install parallel -y
124+
- run: docker buildx create --name builder --platform linux/amd64,linux/arm64 --use && docker buildx inspect --bootstrap
125125
- run: sudo pip install awscli
126126
- run: make ci-build-cli
127127
- return-if-not-deployed-branch
@@ -174,6 +174,10 @@ jobs:
174174
instance_type: inf1.xlarge
175175
min_instances: 1
176176
max_instances: 2
177+
- name: arm
178+
instance_type: a1.large
179+
min_instances: 1
180+
max_instances: 2
177181
EOF
178182
- run:
179183
name: Create/Update AWS User policy

Makefile

+7-1
Original file line numberDiff line numberDiff line change
@@ -127,17 +127,24 @@ async-gateway-update:
127127
# docker images
128128
images-all:
129129
@./dev/registry.sh update all
130+
images-all-multi-arch:
131+
@./dev/registry.sh update all --include-arm64-arch
130132
images-all-skip-push:
131133
@./dev/registry.sh update all --skip-push
132134

133135
images-dev:
134136
@./dev/registry.sh update dev
137+
images-dev-multi-arch:
138+
@./dev/registry.sh update dev --include-arm64-arch
135139
images-dev-skip-push:
136140
@./dev/registry.sh update dev --skip-push
137141

138142
images-manager-skip-push:
139143
@./dev/registry.sh update-single manager --skip-push
140144

145+
images-clean-cache:
146+
@./dev/registry.sh clean-cache
147+
141148
registry-create:
142149
@./dev/registry.sh create
143150

@@ -151,7 +158,6 @@ tools:
151158
@go get -u -v github.com/kyoh86/looppointer/cmd/looppointer
152159
@go get -u -v github.com/VojtechVitek/rerun/cmd/rerun
153160
@go get -u -v github.com/go-delve/delve/cmd/dlv
154-
@if [[ "$$OSTYPE" == "darwin"* ]]; then brew install parallel; elif [[ "$$OSTYPE" == "linux"* ]]; then sudo apt-get install -y parallel; else echo "your operating system is not supported"; fi
155161
@python3 -m pip install aiohttp black 'pydoc-markdown>=3.0.0,<4.0.0' boto3 pyyaml
156162
@python3 -m pip install -e test/e2e
157163

build/build-image.sh

+2-1
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,9 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)"
2222
CORTEX_VERSION=master
2323

2424
image=$1
25+
platforms=$2
2526

2627
if [ "$image" == "inferentia" ]; then
2728
aws ecr get-login-password --region us-west-2 | docker login --username AWS --password-stdin 790709498068.dkr.ecr.us-west-2.amazonaws.com
2829
fi
29-
docker build "$ROOT" -f $ROOT/images/$image/Dockerfile -t quay.io/cortexlabs/${image}:${CORTEX_VERSION} -t cortexlabs/${image}:${CORTEX_VERSION}
30+
docker buildx build $ROOT -f $ROOT/images/$image/Dockerfile -t quay.io/cortexlabs/${image}:${CORTEX_VERSION} -t cortexlabs/${image}:${CORTEX_VERSION} --platform $platforms

build/build-images.sh

+7-8
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,10 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)"
2222
source $ROOT/build/images.sh
2323
source $ROOT/dev/util.sh
2424

25-
# if parallel utility is installed, the docker build commands will be parallelized
26-
if command -v parallel &> /dev/null && [ -n "${NUM_BUILD_PROCS+set}" ] && [ "$NUM_BUILD_PROCS" != "1" ]; then
27-
ROOT=$ROOT SHELL=$(type -p /bin/bash) parallel --will-cite --halt now,fail=1 --eta --jobs $NUM_BUILD_PROCS $ROOT/build/build-image.sh {} ::: "${all_images[@]}"
28-
else
29-
for image in "${all_images[@]}"; do
30-
$ROOT/build/build-image.sh $image
31-
done
32-
fi
25+
for image in "${all_images[@]}"; do
26+
platforms="linux/amd64"
27+
if in_array $image "multi_arch_images"; then
28+
platforms+=",linux/arm64"
29+
fi
30+
$ROOT/build/build-image.sh $image $platforms
31+
done

build/generate_ami_mapping.go

+9-4
Original file line numberDiff line numberDiff line change
@@ -235,11 +235,15 @@ func main() {
235235
fmt.Print(region)
236236
sess := session.New(&aws.Config{Region: aws.String(region)})
237237
svc := ec2.New(sess)
238-
cpuAMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-node-%s-v*", k8sVersion))
238+
cpuAmd64AMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-node-%s-v*", k8sVersion))
239239
if err != nil {
240240
log.Fatal(err.Error())
241241
}
242-
acceleratedAMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-gpu-node-%s-v*", k8sVersion))
242+
cpuArm64AMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-arm64-node-%s-v*", k8sVersion))
243+
if err != nil {
244+
log.Fatal(err.Error())
245+
}
246+
acceleratedAmd64AMI, err := FindImage(svc, EKSResourceAccountID(region), fmt.Sprintf("amazon-eks-gpu-node-%s-v*", k8sVersion))
243247
if err != nil {
244248
log.Fatal(err.Error())
245249
}
@@ -248,8 +252,9 @@ func main() {
248252
k8sVersionMap[k8sVersion][region] = map[string]string{}
249253
}
250254
k8sVersionMap[k8sVersion][region] = map[string]string{
251-
"cpu": cpuAMI,
252-
"accelerated": acceleratedAMI,
255+
"cpu_amd64": cpuAmd64AMI,
256+
"cpu_arm64": cpuArm64AMI,
257+
"accelerated_amd64": acceleratedAmd64AMI,
253258
}
254259
fmt.Println(" ✓")
255260
}

build/images.sh

+12
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,18 @@ non_dev_images=(
5050
"kubexit"
5151
)
5252

53+
# for linux/amd64 and linux/arm64
54+
multi_arch_images=(
55+
"proxy"
56+
"async-gateway"
57+
"enqueuer"
58+
"dequeuer"
59+
"fluent-bit"
60+
"prometheus-node-exporter"
61+
"kube-rbac-proxy"
62+
"kubexit"
63+
)
64+
5365
all_images=(
5466
"${dev_images[@]}"
5567
"${non_dev_images[@]}"

build/push-image.sh

+4-1
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,13 @@
1717

1818
set -euo pipefail
1919

20+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)"
21+
2022
CORTEX_VERSION=master
2123

2224
host=$1
2325
image=$2
26+
platforms=$3
2427

2528
echo "$DOCKER_PASSWORD" | docker login -u "$DOCKER_USERNAME" --password-stdin
26-
docker push $host/cortexlabs/${image}:${CORTEX_VERSION}
29+
docker buildx build $ROOT -f $ROOT/images/$image/Dockerfile $host/cortexlabs/${image}:${CORTEX_VERSION} --platform $platforms --push

build/push-images.sh

+7-8
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,10 @@ source $ROOT/dev/util.sh
2424

2525
host=$1
2626

27-
# if parallel utility is installed, the docker push commands will be parallelized
28-
if command -v parallel &> /dev/null && [ -n "${NUM_BUILD_PROCS+set}" ] && [ "$NUM_BUILD_PROCS" != "1" ]; then
29-
ROOT=$ROOT DOCKER_USERNAME=$DOCKER_USERNAME DOCKER_PASSWORD=$DOCKER_PASSWORD SHELL=$(type -p /bin/bash) parallel --will-cite --halt now,fail=1 --eta --jobs $NUM_BUILD_PROCS $ROOT/build/push-image.sh $host {} ::: "${all_images[@]}"
30-
else
31-
for image in "${all_images[@]}"; do
32-
$ROOT/build/push-image.sh $host $image
33-
done
34-
fi
27+
for image in "${all_images[@]}"; do
28+
platforms="linux/amd64"
29+
if in_array $image "multi_arch_images"; then
30+
platforms+=",linux/arm64"
31+
fi
32+
$ROOT/build/push-image.sh $host $image $platforms
33+
done

dev/registry.sh

+42-61
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")"/.. >/dev/null && pwd)"
2323
source $ROOT/build/images.sh
2424
source $ROOT/dev/util.sh
2525

26-
images_with_builders="operator proxy async-gateway enqueuer dequeuer controller-manager"
26+
images_that_can_run_locally="operator manager"
2727

2828
if [ -f "$ROOT/dev/config/env.sh" ]; then
2929
source $ROOT/dev/config/env.sh
@@ -33,6 +33,7 @@ AWS_ACCOUNT_ID=${AWS_ACCOUNT_ID:-}
3333
AWS_REGION=${AWS_REGION:-}
3434

3535
skip_push="false"
36+
include_arm64_arch="false"
3637
positional_args=()
3738
while [[ $# -gt 0 ]]; do
3839
key="$1"
@@ -41,6 +42,10 @@ while [[ $# -gt 0 ]]; do
4142
skip_push="true"
4243
shift
4344
;;
45+
--include-arm64-arch)
46+
include_arm64_arch="true"
47+
shift
48+
;;
4449
*)
4550
positional_args+=("$1")
4651
shift
@@ -105,59 +110,55 @@ function create_ecr_repository() {
105110

106111
### HELPERS ###
107112

108-
function build() {
109-
local image=$1
110-
local tag=$2
111-
local dir="${ROOT}/images/${image}"
112-
113-
tag_args=""
114-
if [ -n "$AWS_ACCOUNT_ID" ] && [ -n "$AWS_REGION" ]; then
115-
tag_args+=" -t $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/cortexlabs/$image:$tag"
116-
fi
117-
118-
blue_echo "Building $image:$tag..."
119-
docker build $ROOT -f $dir/Dockerfile -t cortexlabs/$image:$tag $tag_args
120-
green_echo "Built $image:$tag\n"
121-
}
122-
123-
function cache_builder() {
113+
function build_and_push() {
124114
local image=$1
115+
local include_arm64_arch=$2
125116
local dir="${ROOT}/images/${image}"
126117

127-
blue_echo "Building $image-builder..."
128-
docker build $ROOT -f $dir/Dockerfile -t cortexlabs/$image-builder:$CORTEX_VERSION --target builder
129-
green_echo "Built $image-builder\n"
130-
}
118+
set -euo pipefail
131119

132-
function push() {
133-
if [ "$skip_push" = "true" ]; then
134-
return
120+
if ! in_array $image "multi_arch_images"; then
121+
include_arm64_arch="false"
135122
fi
136123

124+
if [ ! -n "$AWS_ACCOUNT_ID" ] || [ ! -n "$AWS_REGION" ]; then
125+
echo "AWS_ACCOUNT_ID or AWS_REGION env vars not found"
126+
exit 1
127+
fi
137128
registry_login
138129

139-
local image=$1
140-
local tag=$2
130+
tag=$CORTEX_VERSION
131+
if [ "$include_arm64_arch" = "true" ]; then
132+
blue_echo "Building and pushing $image:$tag (amd64 and arm64)..."
133+
else
134+
blue_echo "Building and pushing $image:$tag (amd64)..."
135+
fi
141136

142-
blue_echo "Pushing $image:$tag..."
143-
docker push $registry_push_url/cortexlabs/$image:$tag
144-
green_echo "Pushed $image:$tag\n"
145-
}
137+
platforms="linux/amd64"
138+
if [ "$include_arm64_arch" = "true" ]; then
139+
platforms+=",linux/arm64"
140+
fi
146141

147-
function build_and_push() {
148-
local image=$1
142+
docker buildx build $ROOT -f $dir/Dockerfile -t $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/cortexlabs/$image:$tag --platform $platforms --push
149143

150-
set -euo pipefail # necessary since this is called in a new shell by parallel
144+
if [ "$include_arm64_arch" = "true" ]; then
145+
green_echo "Built and pushed $image:$tag (amd64 and arm64)..."
146+
else
147+
green_echo "Built and pushed $image:$tag (amd64)..."
148+
fi
151149

152-
tag=$CORTEX_VERSION
153-
build $image $tag
154-
push $image $tag
150+
if [[ " $images_that_can_run_locally " =~ " $image " ]] && [[ "$include_arm64_arch" == "false" ]]; then
151+
blue_echo "Exporting $image:$tag to local docker..."
152+
docker buildx build $ROOT -f $dir/Dockerfile -t cortexlabs/$image:$tag -t $AWS_ACCOUNT_ID.dkr.ecr.$AWS_REGION.amazonaws.com/cortexlabs/$image:$tag --platform $platforms --load
153+
green_echo "Exported $image:$tag to local docker..."
154+
fi
155155
}
156156

157157
function cleanup_local() {
158158
echo "cleaning local repositories..."
159159
docker container prune -f
160160
docker image prune -f
161+
docker buildx prune -f
161162
}
162163

163164
function cleanup_ecr() {
@@ -195,14 +196,6 @@ function validate_env() {
195196
fi
196197
}
197198

198-
# export functions for parallel command
199-
export -f build_and_push
200-
export -f push
201-
export -f build
202-
export -f blue_echo
203-
export -f green_echo
204-
export -f registry_login
205-
206199
# validate environment is correctly set on env.sh
207200
validate_env
208201

@@ -218,13 +211,9 @@ elif [ "$cmd" = "create" ]; then
218211
# usage: registry.sh update-single IMAGE
219212
elif [ "$cmd" = "update-single" ]; then
220213
image=$sub_cmd
221-
if [[ " $images_with_builders " =~ " $image " ]]; then
222-
cache_builder $image
223-
fi
224-
build_and_push $image
214+
build_and_push $image $include_arm64_arch
225215

226216
# usage: registry.sh update all|dev|api
227-
# if parallel utility is installed, the docker build commands will be parallelized
228217
elif [ "$cmd" = "update" ]; then
229218
images_to_build=()
230219

@@ -236,20 +225,12 @@ elif [ "$cmd" = "update" ]; then
236225
images_to_build+=( "${dev_images[@]}" )
237226
fi
238227

239-
for image in $images_with_builders; do
240-
if [[ " ${images_to_build[@]} " =~ " $image " ]]; then
241-
cache_builder $image
242-
fi
228+
for image in "${images_to_build[@]}"; do
229+
build_and_push $image $include_arm64_arch
243230
done
244231

245-
if command -v parallel &> /dev/null && [ -n "${NUM_BUILD_PROCS+set}" ] && [ "$NUM_BUILD_PROCS" != "1" ]; then
246-
is_registry_logged_in=$is_registry_logged_in ROOT=$ROOT registry_push_url=$registry_push_url SHELL=$(type -p /bin/bash) parallel --will-cite --halt now,fail=1 --eta --jobs $NUM_BUILD_PROCS build_and_push "{}" ::: "${images_to_build[@]}"
247-
else
248-
for image in "${images_to_build[@]}"; do
249-
build_and_push $image
250-
done
251-
fi
252-
232+
# usage: registry.sh clean-cache
233+
elif [ "$cmd" = "clean-cache" ]; then
253234
cleanup_local
254235

255236
else

dev/util.sh

+10
Original file line numberDiff line numberDiff line change
@@ -45,3 +45,13 @@ function join_by() {
4545
done
4646
echo
4747
}
48+
49+
# Check if array contains item [$1: item, $2: array name]
50+
function in_array() {
51+
local needle="$1" item
52+
local -n arrref="$2"
53+
for item in "${arrref[@]}"; do
54+
[[ "${item}" == "${needle}" ]] && return 0
55+
done
56+
return 1
57+
}

dev/versions.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@
2525
1. If there are new instance types, check if any changes need to be made to `servicequotas.go` or `validateInstanceType()`.
2626

2727
```bash
28-
PREV_RELEASE=1.7.5
29-
NEW_RELEASE=1.7.10
28+
PREV_RELEASE=1.7.10
29+
NEW_RELEASE=1.8.0
3030
wget -q -O cni_supported_instances_prev.txt https://github.com/raw/aws/amazon-vpc-cni-k8s/v${PREV_RELEASE}/pkg/awsutils/vpc_ip_resource_limit.go; wget -q -O cni_supported_instances_new.txt https://github.com/raw/aws/amazon-vpc-cni-k8s/v${NEW_RELEASE}/pkg/awsutils/vpc_ip_resource_limit.go; git diff --no-index cni_supported_instances_prev.txt cni_supported_instances_new.txt; rm -rf cni_supported_instances_prev.txt; rm -rf cni_supported_instances_new.txt
3131
```
3232

@@ -96,7 +96,7 @@ see https://github.com/moby/moby/issues/39302#issuecomment-639687466_
9696
1. `rm -rf go.mod go.sum && go mod init && go clean -modcache`
9797
1. `go get k8s.io/[email protected] && go get k8s.io/[email protected] && go get k8s.io/[email protected]`
9898
1. `go get istio.io/[email protected] && go get istio.io/[email protected]`
99-
1. `go get github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils@v1.7.10`
99+
1. `go get github.com/aws/amazon-vpc-cni-k8s/pkg/awsutils@v1.8.0`
100100
1. `go get github.com/cortexlabs/yaml@581aea36a2e4db10f8696587e48cac5248d64f4d`
101101
1. `go get github.com/cortexlabs/go-input@8b67a7a7b28d1c45f5c588171b3b50148462b247`
102102
1. `echo -e '\nreplace github.com/docker/docker => github.com/docker/engine v19.03.12' >> go.mod`

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ require (
66
cloud.google.com/go v0.73.0 // indirect
77
github.com/DataDog/datadog-go v4.7.0+incompatible
88
github.com/StackExchange/wmi v0.0.0-20190523213315-cbe66965904d // indirect
9-
github.com/aws/amazon-vpc-cni-k8s v1.7.10
9+
github.com/aws/amazon-vpc-cni-k8s v1.8.0
1010
github.com/aws/aws-sdk-go v1.37.23
1111
github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 // indirect
1212
github.com/containerd/containerd v1.4.3 // indirect

0 commit comments

Comments
 (0)