-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcloudinit.sh
428 lines (341 loc) · 13.9 KB
/
cloudinit.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
#!/bin/bash
# #adding comments to make code readable
set -o pipefail
LOG_FILE="/var/log/OKE-kubeflow-initialize.log"
log() {
echo "$(date) [${EXECNAME}]: $*" >> "${LOG_FILE}"
}
region=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/regionInfo/regionIdentifier`
namespace=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/metadata/namespace`
availability_domain=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v2/instance/metadata/availability_domain`
oke_cluster_id=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/oke_cluster_id`
kubeflow_password=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/kubeflow_password`
mount_target_id=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/mount_target`
kustomize_version=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/kustomize_version`
kubeflow_branch=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/kubeflow_version`
load_balancer_ip=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/load_balancer_ip`
configure_oracle_auth=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/configure_oracle_auth`
issuer=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/oci_domain`
client_id=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/client_id`
client_secret=`curl -s -H "Authorization: Bearer Oracle" -L http://169.254.169.254/opc/v1/instance/metadata/client_secret`
country=`echo $region|awk -F'-' '{print $1}'`
city=`echo $region|awk -F'-' '{print $2}'`
# Define the variables
EXECNAME="Kubectl & Git"
log "->Install"
cat <<EOF | sudo tee /etc/yum.repos.d/kubernetes.repo
[kubernetes]
name=Kubernetes
baseurl=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/
enabled=1
gpgcheck=0
repo_gpgcheck=0
gpgkey=https://pkgs.k8s.io/core:/stable:/v1.29/rpm/repodata/repomd.xml.key
EOF
yum install kubectl git -y >> $LOG_FILE
# Kubectl is installed and now, you need to configure kubectl
log "->Configure"
mkdir -p /home/opc/.kube
echo "source <(kubectl completion bash)" >> ~/.bashrc
echo "alias k='kubectl'" >> ~/.bashrc
echo "source <(kubectl completion bash)" >> /home/opc/.bashrc
echo "alias k='kubectl'" >> /home/opc/.bashrc
source ~/.bashrc
# Get the OCI CLI installed
EXECNAME="OCI CLI"
log "->Install"
yum install python36-oci-cli -y >> $LOG_FILE
echo "export OCI_CLI_AUTH=instance_principal" >> ~/.bash_profile
echo "export OCI_CLI_AUTH=instance_principal" >> ~/.bashrc
echo "export OCI_CLI_AUTH=instance_principal" >> /home/opc/.bash_profile
echo "export OCI_CLI_AUTH=instance_principal" >> /home/opc/.bashrc
EXECNAME="Kubeconfig"
log "->Generate"
while [ ! -f /root/.kube/config ]
do
sleep 5
source ~/.bashrc
oci ce cluster create-kubeconfig --cluster-id ${oke_cluster_id} --file /root/.kube/config --region ${region} --token-version 2.0.0 >> $LOG_FILE
done
cp /root/.kube/config /home/opc/.kube/config
chown -R opc:opc /home/opc/.kube/
EXECNAME="Kustomize"
log "->Fetch & deploy to /bin/"
# Now that we have kubectl configured, let us download kustomize
wget "https://github.com/kubernetes-sigs/kustomize/releases/download/kustomize%2Fv${kustomize_version}/kustomize_v${kustomize_version}_linux_amd64.tar.gz"
tar -xzvf kustomize_v${kustomize_version}_linux_amd64.tar.gz
mv kustomize /bin/kustomize
chmod +x /bin/kustomize
# Download Kubeflow
EXECNAME="Kubeflow"
log "->Clone Repo"
mkdir -p /opt/kubeflow
cd /opt/kubeflow
# Ensure Kubeflow 1.8 alone is used
export kubeflow_branch
git clone -b v$kubeflow_branch https://github.com/kubeflow/manifests.git >> $LOG_FILE
LBIP="$load_balancer_ip"
DOMAIN="kubeflow.$load_balancer_ip.nip.io"
# Create certificates
mkdir -p /opt/kfsecure
cd /opt/kfsecure
cat <<EOF | tee /opt/kfsecure/istio_namespace.yaml
apiVersion: v1
kind: Namespace
metadata:
name: istio-system
EOF
kubectl --kubeconfig /root/.kube/config apply -f /opt/kfsecure/istio_namespace.yaml
sleep 20
openssl req -x509 -sha256 -days 356 -nodes -newkey rsa:2048 -subj "/CN=${DOMAIN}/C=$country/L=$city" -keyout rootCA.key -out rootCA.crt
cat > csr.conf <<EOF
[ req ]
default_bits = 2048
prompt = no
default_md = sha256
req_extensions = req_ext
distinguished_name = dn
[ dn ]
C = $country
ST = $city
L = $city
O = Kubeflow
OU = Kubeflow
CN = ${DOMAIN}
[ req_ext ]
subjectAltName = @alt_names
[ alt_names ]
DNS.1 = ${DOMAIN}
IP.1 = ${LBIP}
EOF
openssl genrsa -out "${DOMAIN}.key" 2048
openssl req -new -key "${DOMAIN}.key" -out "${DOMAIN}.csr" -config csr.conf
cat > cert.conf <<EOF
authorityKeyIdentifier=keyid,issuer
basicConstraints=CA:FALSE
keyUsage = digitalSignature, nonRepudiation, keyEncipherment, dataEncipherment
subjectAltName = @alt_names
[alt_names]
DNS.1 = ${DOMAIN}
IP.1 = ${LBIP}
EOF
openssl x509 -req -in "${DOMAIN}.csr" -CA rootCA.crt -CAkey rootCA.key -CAcreateserial -out "${DOMAIN}.crt" -days 365 -sha256 -extfile cert.conf
sleep 10
for i in {1..5}; do
kubectl --kubeconfig /root/.kube/config create secret tls kubeflow-tls-cert --key=$DOMAIN.key --cert=$DOMAIN.crt -n istio-system |tee -a $LOG_FILE
if kubectl --kubeconfig /root/.kube/config get secret kubeflow-tls-cert -n istio-system >/dev/null 2>&1; then
echo "kubeflow-tls-cert secret created successfully" |tee -a $LOG_FILE
break
fi
echo "Attempt to create kubeflow-tls-cert secret failed. Retrying in 5 seconds..." |tee -a $LOG_FILE
sleep 5
done
cp /opt/kubeflow/manifests/common/dex/base/config-map.yaml /opt/kubeflow/manifests/common/dex/base/config-map.yaml.DEFAULT
# Enable authentication through Oracle IDCS
if [ "$configure_oracle_auth" != false ]; then
# Update the issuer line
sed -i "s|issuer:.*|issuer: https://${DOMAIN}/dex|g" /opt/kubeflow/manifests/common/dex/base/config-map.yaml
# Update the redirectURIs line
sed -i "s|redirectURIs:.*|redirectURIs: [\"/authservice/oidc/callback\",\"https://${DOMAIN}/dex/callback\"]|g" /opt/kubeflow/manifests/common/dex/base/config-map.yaml
# Add Oracle connector
cat <<EOF >> /opt/kubeflow/manifests/common/dex/base/config-map.yaml
connectors:
- type: oidc
id: oracle
name: Oracle
config:
issuer: ${issuer}/
clientID: ${client_id}
clientSecret: ${client_secret}
redirectURI: https://${DOMAIN}/dex/callback
getUserInfo: true
userNameKey: user_displayname
insecureSkipEmailVerified: true
EOF
###### Update OIDC Provider
sed -i "s|^OIDC_PROVIDER=.*|OIDC_PROVIDER=https://${DOMAIN}/dex|g" /opt/kubeflow/manifests/common/oidc-client/oidc-authservice/base/params.env
# Add CA_BUNDLE to OIDC
sed -i "/^OIDC_PROVIDER=.*/a\CA_BUNDLE=/cert/b64" /opt/kubeflow/manifests/common/oidc-client/oidc-authservice/base/params.env
####### Modify StatefulSet
sed -i "/mountPath: \/var\/lib\/authservice/a\\
- name: ca-cert\\
readOnly: true\\
mountPath: /cert" /opt/kubeflow/manifests/common/oidc-client/oidc-authservice/base/statefulset.yaml
cat <<EOF >> /opt/kubeflow/manifests/common/oidc-client/oidc-authservice/base/statefulset.yaml
- name: ca-cert
secret:
secretName: kubeflow-tls-cert
items:
- key: tls.crt
path: b64
defaultMode: 511
EOF
## Enable Automatic Profiles for Dashboard
sed -i "s/^CD_REGISTRATION_FLOW=false/CD_REGISTRATION_FLOW=true/" /opt/kubeflow/manifests/apps/centraldashboard/upstream/base/params.env
fi
# Change the default Kubeflow Password
export kubeflow_password
pip3 install --upgrade pip
pip3 install passlib
pip3 install bcrypt
hashed_password=$(python3 -c "
import os
from passlib.hash import bcrypt
kubeflow_password = os.getenv('kubeflow_password')
print(bcrypt.using(rounds=12, ident='2y').hash(kubeflow_password))
")
sed -i "s|hash:.*|hash: $hashed_password|" /opt/kubeflow/manifests/common/dex/base/config-map.yaml
if [ "$mount_target_id" != "not_using" ]; then
mkdir -p /opt/kubeflow_fs
cd /opt/kubeflow_fs
cat > existing-fss-st-class.yaml <<EOF
kind: StorageClass
apiVersion: storage.k8s.io/v1
metadata:
name: existing-fss-storage
provisioner: fss.csi.oraclecloud.com
parameters:
availabilityDomain: $availability_domain
mountTargetOcid: $mount_target_id
EOF
sleep 20
kubectl --kubeconfig /root/.kube/config apply -f existing-fss-st-class.yaml
kubectl --kubeconfig /root/.kube/config patch storageclass oci-bv -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"false"}}}'
kubectl --kubeconfig /root/.kube/config patch storageclass existing-fss-storage -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
sed -i 's|chmod -R 777 /var/lib/authservice;|find /var/lib/authservice ! -name ".snapshot" -exec chmod 777 {} +;|' /opt/kubeflow/manifests/common/oidc-client/oidc-authservice/overlays/ibm-storage-config/statefulset.yaml
# Install Kubeflow
log "->Install via Kustomize with FS as storage class"
source <(kubectl completion bash)
log "-->Build & Deploy Kubeflow with FS as storage class"
cd /opt/kubeflow/manifests
while ! kustomize build example | kubectl apply --kubeconfig /root/.kube/config -f - | tee -a $LOG_FILE; do echo 'Retrying to apply resources'; sleep 60; done
sleep 120
# add another condition to wait for the pod status
pod_status=$(kubectl --kubeconfig /root/.kube/config get pod "oidc-authservice-0" -n "istio-system" -o jsonpath='{.status.containerStatuses[?(@.state.waiting.reason == "CrashLoopBackOff")]}')
if [ -n "$pod_status" ]; then
echo "$pod_status"
kustomize build common/oidc-client/oidc-authservice/overlays/ibm-storage-config | kubectl apply --kubeconfig /root/.kube/config -f -
kubectl --kubeconfig /root/.kube/config delete pod "oidc-authservice-0" -n "istio-system"
fi
else
# Install Kubeflow
log "->Install via Kustomize"
source <(kubectl completion bash)
log "-->Build & Deploy Kubeflow"
cd /opt/kubeflow/manifests
while ! kustomize build example | kubectl apply --kubeconfig /root/.kube/config -f - | tee -a $LOG_FILE; do echo 'Retrying to apply resources'; sleep 60; done
fi
sleep 30
# Check status for all pods
all_pods_running() {
pods=$(kubectl --kubeconfig /root/.kube/config get pods --all-namespaces --no-headers)
all_running=true
echo "$pods" | while read -r namespace name ready status _; do
if [[ "$status" != "Running" ]]; then
all_running=false
break
fi
done
echo $all_running
}
# Check for Mount Volume error on failed pods
check_mount_volume_errors() {
local name=$1
local namespace=$2
errors=$(kubectl --kubeconfig /root/.kube/config describe pod "$name" -n "$namespace" | grep "MountVolume.SetUp failed")
echo "$errors"
}
# Reaply the kubeflow deployment untill all pods are running
status="NotRunning"
while [[ "$status" != "true" ]]; do
echo "Checking pod statuses..."
pods=$(kubectl --kubeconfig /root/.kube/config get pods --all-namespaces --no-headers)
echo "$pods" | while read -r namespace name ready status _; do
if [[ "$status" != "Running" ]]; then
echo "Pod $name in namespace $namespace is not Running." >> $LOG_FILE
errors=$(check_mount_volume_errors "$name" "$namespace")
if [[ ! -z "$errors" ]]; then
echo "MountVolume.SetUp error found for pod $name. Re-applying resources..." >> $LOG_FILE
while ! kustomize build example | kubectl apply -f - | tee -a "$LOG_FILE"; do
echo 'Retrying to apply resources...'
sleep 60
done
# Recheck pod statuses after applying resources
echo "Rechecking pod statuses after applying resources..." >> $LOG_FILE
sleep 20
status=$(all_pods_running)
if [[ "$status" == "true" ]]; then
echo "All pods are now Running after reapplying resources." >> $LOG_FILE
break
fi
fi
fi
done
# Check if all pods are now Running
status=$(all_pods_running)
if [[ "$status" == "true" ]]; then
echo "All pods are now Running." >> $LOG_FILE
break
else
echo "Not all pods are Running. Waiting..."
sleep 10
fi
done
cat <<EOF | tee /tmp/patchservice_lb.yaml
spec:
type: LoadBalancer
loadBalancerIP: $load_balancer_ip
metadata:
annotations:
oci.oraclecloud.com/load-balancer-type: "lb"
service.beta.kubernetes.io/oci-load-balancer-shape: "flexible"
service.beta.kubernetes.io/oci-load-balancer-shape-flex-min: "10"
service.beta.kubernetes.io/oci-load-balancer-shape-flex-max: "10"
EOF
for i in {1..3}; do
if [ $(kubectl --kubeconfig /root/.kube/config get pods -n istio-system --no-headers=true |egrep -i ingressgateway | awk '{print $3}') = "Running" ]; then
echo "Ingress Gateway has been created successfully"
break
fi
sleep 60
done
kubectl --kubeconfig /root/.kube/config patch svc istio-ingressgateway -n istio-system -p "$(cat /tmp/patchservice_lb.yaml)"
sleep 120
cat <<EOF | tee /opt/kfsecure/sslenableingress.yaml
apiVersion: v1
items:
- apiVersion: networking.istio.io/v1beta1
kind: Gateway
metadata:
annotations:
name: kubeflow-gateway
namespace: kubeflow
spec:
selector:
istio: ingressgateway
servers:
- hosts:
- "*"
port:
name: https
number: 443
protocol: HTTPS
tls:
mode: SIMPLE
credentialName: kubeflow-tls-cert
- hosts:
- "*"
port:
name: http
number: 80
protocol: HTTP
tls:
httpsRedirect: true
kind: List
metadata:
resourceVersion: ""
selfLink: ""
EOF
kubectl --kubeconfig /root/.kube/config apply -f /opt/kfsecure/sslenableingress.yaml
echo "Load Balancer IP is ${LBIP}" |tee -a $LOG_FILE
echo "Point your browser to https://${DOMAIN}" |tee -a $LOG_FILE