Skip to content

Commit

Permalink
Adding Spark CI docker
Browse files Browse the repository at this point in the history
  • Loading branch information
CUK_AIDev committed Dec 5, 2024
1 parent 506b44d commit 5e36252
Show file tree
Hide file tree
Showing 2 changed files with 144 additions and 0 deletions.
67 changes: 67 additions & 0 deletions .github/workflows/docker-build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
name: Build and Push Docker Image

on:
push:
branches: [ "main" ]
tags: [ 'v*.*.*' ]
pull_request:
branches: [ "main" ]

jobs:
build:
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
security-events: write

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Login to GitHub Container Registry
uses: docker/login-action@v3
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository }}/spark-ci
tags: |
type=ref,event=branch
type=ref,event=pr
type=semver,pattern={{version}}
type=semver,pattern={{major}}.{{minor}}
type=sha
- name: Cache Docker layers
uses: actions/cache@v3
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-
- name: Build and push Docker image
uses: docker/build-push-action@v6
with:
context: .
file: docker/Spark.CI.Dockerfile
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64,linux/arm64
cache-from: type=gha
cache-to: type=gha,mode=max
build-args: |
BUILDKIT_INLINE_CACHE=1
77 changes: 77 additions & 0 deletions docker/Spark.CI.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
FROM ubuntu:24.04

# 3.11.7
ARG PYTHON_VERSION=3.9.16
ARG SPARK_VERSION=3.5.0
ARG HADOOP_VERSION=3.3.5
ARG JAVA_VERSION=11.0.24-zulu

# Install necessary dependencies
RUN apt-get update && apt-get install -y \
curl \
zip \
unzip \
build-essential \
libssl-dev \
zlib1g-dev \
libbz2-dev \
libreadline-dev \
libsqlite3-dev \
wget \
llvm \
libncurses5-dev \
xz-utils \
tk-dev \
libxml2-dev \
libxmlsec1-dev \
libffi-dev \
liblzma-dev \
git \
&& rm -rf /var/lib/apt/lists/*

# Install pyenv
RUN curl https://pyenv.run | bash

# Set up pyenv environment variables
ENV PYENV_ROOT /root/.pyenv

Check warning on line 36 in docker/Spark.CI.Dockerfile

View workflow job for this annotation

GitHub Actions / build

Legacy key/value format with whitespace separator should not be used

LegacyKeyValueFormat: "ENV key=value" should be used instead of legacy "ENV key value" format More info: https://docs.docker.com/go/dockerfile/rule/legacy-key-value-format/
ENV PATH $PYENV_ROOT/shims:$PYENV_ROOT/bin:$PATH

Check warning on line 37 in docker/Spark.CI.Dockerfile

View workflow job for this annotation

GitHub Actions / build

Legacy key/value format with whitespace separator should not be used

LegacyKeyValueFormat: "ENV key=value" should be used instead of legacy "ENV key value" format More info: https://docs.docker.com/go/dockerfile/rule/legacy-key-value-format/

# Install Python using pyenv
RUN pyenv install ${PYTHON_VERSION} && \
pyenv global ${PYTHON_VERSION} && \
pyenv rehash

# Install sdkman
RUN curl -s "https://get.sdkman.io" | bash

# Install Java, Spark, and Hadoop using sdkman
RUN bash -c "source $HOME/.sdkman/bin/sdkman-init.sh && \
sdk install java ${JAVA_VERSION} && \
sdk use java ${JAVA_VERSION} && \
sdk install spark ${SPARK_VERSION} && \
sdk use spark ${SPARK_VERSION}"
# && \
# sdk install hadoop ${HADOOP_VERSION} && \
# sdk use hadoop ${HADOOP_VERSION}"

# Set up environment variables
ENV JAVA_HOME=/root/.sdkman/candidates/java/current
ENV SPARK_HOME=/root/.sdkman/candidates/spark/current
# ENV HADOOP_HOME=/root/.sdkman/candidates/hadoop/current
ENV PATH=$PATH:$JAVA_HOME/bin:$SPARK_HOME/bin:$SPARK_HOME/sbin
# :$HADOOP_HOME/bin

# Set up Python
RUN python3 -m pip install --upgrade pip && \
python3 -m pip install --user pipx && \
python3 -m pipx ensurepath
ENV PATH="/root/.local/bin:$PATH"

RUN pipx install poetry==1.8.4

# Verify installations with detailed version output and error handling
RUN echo "Verifying installations..." && \
java -version || exit 1 && \
python3 --version || exit 1 && \
spark-submit --version || exit 1 && \
poetry -V || exit 1

0 comments on commit 5e36252

Please sign in to comment.