Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] added manylinux support #185

Merged
merged 7 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 0 additions & 49 deletions .github/workflows/publish_base_image.yml

This file was deleted.

5 changes: 5 additions & 0 deletions .github/workflows/publish_devel_image.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
name: Publish devel docker image
on:
workflow_dispatch:
env:
# Tells where to store caches.
CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache

jobs:
publish_base:
Expand All @@ -27,6 +30,8 @@ jobs:
context: ./docker
file: ./docker/Dockerfile.devel
push: true
cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
build-args: |
UBUNTU_VERSION=22.04
CUDA_VERSION=12.1
Expand Down
52 changes: 52 additions & 0 deletions .github/workflows/publish_manylinux_image.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
name: Publish manylinux docker image
on:
workflow_dispatch:
env:
# Tells where to store caches.
CI_CACHE_DIR: ${{ github.workspace }}/../../ci_cache

jobs:
publish_base:
runs-on: [self-hosted, linux, release]
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up QEMU
uses: docker/setup-qemu-action@v3

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Login to Docker Hub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKER_HUB_USER }}
password: ${{ secrets.DOCKER_HUB_TOKEN }}

- name: Build base for cuda 12.1
uses: docker/build-push-action@v5
with:
context: ./docker
file: ./docker/Dockerfile.manylinux
push: true
cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
build-args: |
CUDA_VERSION=12.1
tags: |
vectorchai/scalellm_manylinux:cuda12.1

# - name: Build base for cuda 11.8
# uses: docker/build-push-action@v5
# with:
# context: ./docker
# file: ./docker/Dockerfile.manylinux
# push: true
# cache-from: type=local,src=$CI_CACHE_DIR/.buildx-cache
# cache-to: type=local,dest=$CI_CACHE_DIR/.buildx-cache
# build-args: |
# CUDA_VERSION=11.8
# tags: |
# vectorchai/scalellm_manylinux:cuda11.8

9 changes: 5 additions & 4 deletions .github/workflows/release_wheel.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ env:
jobs:
build_wheel:
strategy:
fail-fast: false
matrix:
python: ["3.9", "3.10", "3.11", "3.12"]
cuda: ["11.8", "12.1"]
torch: ["2.1", "2.2", "2.3"]
python: ["3.9", "3.10", "3.11"]
cuda: ["12.1"]
torch: ["2.2", "2.3"]
runs-on: [self-hosted, linux, release]
steps:
- name: Checkout repository
Expand All @@ -36,7 +37,7 @@ jobs:
-e VCPKG_DEFAULT_BINARY_CACHE=/ci_cache/.vcpkg/bincache \
-e CCACHE_DIR=/ci_cache/.ccache \
--user $(id -u):$(id -g) \
vectorchai/scalellm_builder:cuda${{ matrix.cuda }}-ubuntu22.04 \
vectorchai/scalellm_manylinux:cuda${{ matrix.cuda }} \
bash /ScaleLLM/scripts/build_wheel.sh
timeout-minutes: 60

Expand Down
10 changes: 8 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ set_property(GLOBAL PROPERTY USE_FOLDERS ON)

option(USE_CCACHE "Attempt using CCache to wrap the compilation" ON)
option(USE_CXX11_ABI "Use the new C++-11 ABI, which is not backwards compatible." ON)
option(USE_MANYLINUX "Build for manylinux" OFF)

set(CMAKE_POSITION_INDEPENDENT_CODE ON)

Expand Down Expand Up @@ -144,7 +145,6 @@ if(UNIX)
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Og")
endif()

find_package(Boost CONFIG REQUIRED)
find_package(Threads REQUIRED)
# find all dependencies from vcpkg
find_package(fmt CONFIG REQUIRED)
Expand All @@ -162,7 +162,13 @@ find_package(prometheus-cpp CONFIG REQUIRED)
find_package(stduuid CONFIG REQUIRED)
find_package(RapidJSON CONFIG REQUIRED)

find_package(Python REQUIRED COMPONENTS Interpreter Development)
if (USE_MANYLINUX)
# manylinux doesn't ship Development.Embed
find_package(Python REQUIRED COMPONENTS Interpreter Development.Module)
else()
find_package(Python REQUIRED COMPONENTS Interpreter Development)
endif()

find_package(NCCL REQUIRED)

if (USE_CXX11_ABI)
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile.base
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

# Install gcc
ARG GCC_VERSION=12
ARG GCC_VERSION=11
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
software-properties-common gpg-agent
Expand Down
48 changes: 48 additions & 0 deletions docker/Dockerfile.manylinux
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
FROM quay.io/pypa/manylinux_2_28_x86_64 as base

LABEL maintainer="mi@vectorch.com"
ENV DEBIAN_FRONTEND noninteractive

ENV LC_ALL en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LANGUAGE en_US.UTF-8

# Install common dependencies
COPY ./common/install_base.sh install_base.sh
RUN bash ./install_base.sh && rm install_base.sh

# Install user
COPY ./common/install_user.sh install_user.sh
RUN bash ./install_user.sh && rm install_user.sh

# Install cuda, cudnn and nccl
ARG CUDA_VERSION=12.1
COPY ./common/install_cuda.sh install_cuda.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

# ARG CMAKE_VERSION=3.18.5
# COPY ./common/install_cmake.sh install_cmake.sh
# RUN if [ -n "${CMAKE_VERSION}" ]; then bash ./install_cmake.sh; fi
# RUN rm install_cmake.sh

ARG NINJA_VERSION=1.11.1
COPY ./common/install_ninja.sh install_ninja.sh
RUN if [ -n "${NINJA_VERSION}" ]; then bash ./install_ninja.sh; fi
RUN rm install_ninja.sh

ARG CCACHE_VERSION=4.8.3
COPY ./common/install_ccache.sh install_ccache.sh
RUN if [ -n "${CCACHE_VERSION}" ]; then bash ./install_ccache.sh; fi
RUN rm install_ccache.sh

# install rust
ENV RUSTUP_HOME=/usr/local/rustup
ENV CARGO_HOME=/usr/local/cargo
ENV PATH=/usr/local/cargo/bin:$PATH
RUN curl https://sh.rustup.rs -sSf | sh -s -- -y
# give everyone permission to use rust
RUN chmod -R a+w ${RUSTUP_HOME} ${CARGO_HOME}
RUN rustup --version; cargo --version; rustc --version

CMD ["bash"]
20 changes: 20 additions & 0 deletions docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,31 @@ install_ubuntu() {
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
}

install_almalinux() {
yum -y update
yum -y install \
zip \
wget \
curl \
perl \
sudo \
vim \
jq \
libtool \
unzip

# Cleanup
yum clean all
}

ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
case "$ID" in
ubuntu)
install_ubuntu
;;
almalinux)
install_almalinux
;;
*)
echo "Unable to determine OS..."
exit 1
Expand Down
16 changes: 16 additions & 0 deletions docker/common/install_ccache.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -ex

[ -n "$CCACHE_VERSION" ]

ARCH=$(uname -m)
url=https://github.com/ccache/ccache/releases/download/v${CCACHE_VERSION}/ccache-${CCACHE_VERSION}-linux-${ARCH}.tar.xz

pushd /tmp
curl -L "$url" | xz -d | tar -x
cp ./ccache-${CCACHE_VERSION}-linux-x86_64/ccache /usr/bin/ccache
popd

# set max cache size to 5GiB
/usr/bin/ccache -M 5Gi
3 changes: 3 additions & 0 deletions docker/common/install_cmake.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ case "$ID" in
ubuntu)
apt-get remove cmake -y
;;
almalinux)
rm /usr/local/bin/cmake
;;
*)
echo "Unable to determine OS..."
exit 1
Expand Down
Loading
Loading