Skip to content

Commit

Permalink
Improve CMake options (#376)
Browse files Browse the repository at this point in the history
* Let all CMake option names start with `MSCCLPP_`
* Explain the `MSCCLPP_BUILD_PYTHON_BINDINGS` option in readme

---------

Co-authored-by: Binyang Li <[email protected]>
  • Loading branch information
chhwang and Binyang2014 authored Nov 22, 2024
1 parent db8e187 commit 2127a3b
Show file tree
Hide file tree
Showing 13 changed files with 63 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .azure-pipelines/integration-test-rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON ..
CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
2 changes: 1 addition & 1 deletion .azure-pipelines/integration-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
2 changes: 1 addition & 1 deletion .azure-pipelines/multi-nodes-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
targetType: 'inline'
script: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j
workingDirectory: '$(System.DefaultWorkingDirectory)'

Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/codeql-analysis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ jobs:
- name: Build
run: |
rm -rf build && mkdir build && cd build
cmake -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON ..
cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON ..
make -j
- name: Perform CodeQL Analysis
Expand Down Expand Up @@ -91,7 +91,7 @@ jobs:
- name: Build
run: |
rm -rf build && mkdir build && cd build
CXX=/opt/rocm/bin/hipcc cmake -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON ..
CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON ..
make -j
- name: Perform CodeQL Analysis
Expand Down
54 changes: 27 additions & 27 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ enable_language(CXX)
list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake)

# Options
option(ENABLE_TRACE "Enable tracing" OFF)
option(BUILD_TESTS "Build tests" ON)
option(BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
option(BUILD_APPS_NCCL "Build NCCL interfaces" ON)
option(USE_CUDA "Use NVIDIA/CUDA." OFF)
option(USE_ROCM "Use AMD/ROCm." OFF)
option(BYPASS_GPU_CHECK "Bypass GPU check." OFF)

if(BYPASS_GPU_CHECK)
if(USE_CUDA)
option(MSCCLPP_ENABLE_TRACE "Enable tracing" OFF)
option(MSCCLPP_BUILD_TESTS "Build tests" ON)
option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON)
option(MSCCLPP_BUILD_APPS_NCCL "Build NCCL interfaces" ON)
option(MSCCLPP_USE_CUDA "Use NVIDIA/CUDA." OFF)
option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF)
option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF)

if(MSCCLPP_BYPASS_GPU_CHECK)
if(MSCCLPP_USE_CUDA)
message("Bypassing GPU check: using NVIDIA/CUDA.")
find_package(CUDAToolkit REQUIRED)
elseif(USE_ROCM)
elseif(MSCCLPP_USE_ROCM)
message("Bypassing GPU check: using AMD/ROCm.")
# Temporal fix for rocm5.6
set(CMAKE_PREFIX_PATH "/opt/rocm;${CMAKE_PREFIX_PATH}")
Expand All @@ -40,16 +40,16 @@ else()
include(CheckAmdGpu)
if(NVIDIA_FOUND AND AMD_FOUND)
message("Detected NVIDIA/CUDA and AMD/ROCm: prioritizing NVIDIA/CUDA.")
set(USE_CUDA ON)
set(USE_ROCM OFF)
set(MSCCLPP_USE_CUDA ON)
set(MSCCLPP_USE_ROCM OFF)
elseif(NVIDIA_FOUND)
message("Detected NVIDIA/CUDA.")
set(USE_CUDA ON)
set(USE_ROCM OFF)
set(MSCCLPP_USE_CUDA ON)
set(MSCCLPP_USE_ROCM OFF)
elseif(AMD_FOUND)
message("Detected AMD/ROCm.")
set(USE_CUDA OFF)
set(USE_ROCM ON)
set(MSCCLPP_USE_CUDA OFF)
set(MSCCLPP_USE_ROCM ON)
else()
message(FATAL_ERROR "Neither NVIDIA/CUDA nor AMD/ROCm is found.")
endif()
Expand All @@ -58,7 +58,7 @@ endif()
# Declare project
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
if(USE_CUDA)
if(MSCCLPP_USE_CUDA)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wextra")
project(mscclpp LANGUAGES CXX CUDA)
Expand Down Expand Up @@ -115,13 +115,13 @@ if(IBVERBS_FOUND)
target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS)
endif()
set_target_properties(mscclpp_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION})
if(USE_CUDA)
target_compile_definitions(mscclpp_obj PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_compile_definitions(mscclpp_obj PRIVATE USE_ROCM)
if(MSCCLPP_USE_CUDA)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_CUDA)
elseif(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_ROCM)
endif()
if(ENABLE_TRACE)
target_compile_definitions(mscclpp_obj PRIVATE ENABLE_TRACE)
if(MSCCLPP_ENABLE_TRACE)
target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_ENABLE_TRACE)
endif()
if(NPKIT_FLAGS)
target_compile_definitions(mscclpp_obj PRIVATE ${NPKIT_FLAGS})
Expand Down Expand Up @@ -150,17 +150,17 @@ install(TARGETS mscclpp_static
ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib)

# Tests
if(BUILD_TESTS)
if(MSCCLPP_BUILD_TESTS)
enable_testing() # Called here to allow ctest from the build directory
add_subdirectory(test)
endif()

# Python bindings
if(BUILD_PYTHON_BINDINGS)
if(MSCCLPP_BUILD_PYTHON_BINDINGS)
add_subdirectory(python)
endif()

# NCCL interfaces
if(BUILD_APPS_NCCL)
if(MSCCLPP_BUILD_APPS_NCCL)
add_subdirectory(apps/nccl)
endif()
12 changes: 6 additions & 6 deletions apps/nccl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS src/*)
file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS include/nccl.h)

if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX)
endif()

Expand All @@ -14,10 +14,10 @@ target_sources(mscclpp_nccl_obj PUBLIC FILE_SET HEADERS FILES ${HEADERS})
target_include_directories(mscclpp_nccl_obj PRIVATE include ${PROJECT_SOURCE_DIR}/src/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
target_link_libraries(mscclpp_nccl_obj PRIVATE ${GPU_LIBRARIES} PUBLIC mscclpp_obj)
set_target_properties(mscclpp_nccl_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION})
if(USE_CUDA)
target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_CUDA)
elseif(USE_ROCM)
target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_ROCM)
if(MSCCLPP_USE_CUDA)
target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_CUDA)
elseif(MSCCLPP_USE_ROCM)
target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_ROCM)
endif()

add_library(mscclpp_nccl SHARED)
Expand All @@ -34,6 +34,6 @@ install(TARGETS mscclpp_nccl
install(TARGETS mscclpp_nccl_static
ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib)

if(BUILD_TESTS)
if(MSCCLPP_BUILD_TESTS)
add_subdirectory(test)
endif()
15 changes: 13 additions & 2 deletions docs/getting-started/quickstart.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,17 @@
* AMD MI250X GPUs + ROCm >= 5.7
* AMD MI300X GPUs + ROCm >= 6.0
* OS: tested over Ubuntu 18.04 and 20.04
* Libraries: [libnuma](https://github.com/numactl/numactl), MPI (optional)
* Libraries
* [libnuma](https://github.com/numactl/numactl)
```bash
sudo apt-get install libnuma-dev
```
* (Optional, for [building the Python module](#install-from-source-python-module)) Python >= 3.8 and Python Development Package
```bash
sudo apt-get satisfy "python3 (>=3.8), python3-dev (>=3.8)"
```
If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source (Libraries and Headers)](#install-from-source-libraries-and-headers)).
* (Optional, for benchmarks) MPI
* Others
* For NVIDIA platforms, `nvidia_peermem` driver should be loaded on all nodes. Check it via:
```
Expand Down Expand Up @@ -60,11 +70,12 @@ $ CXX=/path/to/hipcc cmake -DCMAKE_BUILD_TYPE=Release ..
$ make -j
```
(install-from-source-libraries-and-headers)=
## Install from Source (Libraries and Headers)
```bash
# Install the generated headers and binaries to /usr/local/mscclpp
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DBUILD_PYTHON_BINDINGS=OFF ..
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF ..
$ make -j mscclpp mscclpp_static
$ sudo make install/fast
```
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ wheel.packages = ["python/mscclpp", "python/mscclpp_benchmark"]
wheel.install-dir = "mscclpp"

[tool.scikit-build.cmake.define]
BUILD_PYTHON_BINDINGS = "ON"
BUILD_TESTS = "OFF"
MSCCLPP_BUILD_PYTHON_BINDINGS = "ON"
MSCCLPP_BUILD_TESTS = "OFF"

[tool.black]
line-length = 120
Expand Down
8 changes: 4 additions & 4 deletions src/bootstrap/socket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ static int envSocketFamily(void) {

static int findInterfaces(const char* prefixList, char* names, union SocketAddress* addrs, int sock_family,
int maxIfNameSize, int maxIfs) {
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
struct mscclpp::netIf userIfs[MAX_IFS];
Expand Down Expand Up @@ -184,7 +184,7 @@ static bool matchSubnet(struct ifaddrs local_if, union SocketAddress* remote) {

int FindInterfaceMatchSubnet(char* ifNames, union SocketAddress* localAddrs, union SocketAddress* remoteAddr,
int ifNameMaxSize, int maxIfs) {
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
char line_a[SOCKET_NAME_MAXLEN + 1];
Expand Down Expand Up @@ -436,7 +436,7 @@ void Socket::bind() {

void Socket::bindAndListen() {
bind();
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
TRACE(MSCCLPP_INIT | MSCCLPP_NET, "Listening on socket %s", SocketToString(&addr_, line));
#endif
Expand All @@ -452,7 +452,7 @@ void Socket::bindAndListen() {

void Socket::connect(int64_t timeout) {
mscclpp::Timer timer;
#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
char line[SOCKET_NAME_MAXLEN + 1];
#endif
const int one = 1;
Expand Down
6 changes: 3 additions & 3 deletions src/include/atomic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
#ifndef MSCCLPP_ATOMIC_HPP_
#define MSCCLPP_ATOMIC_HPP_

#if defined(USE_CUDA)
#if defined(MSCCLPP_USE_CUDA)
#define MSCCLPP_DEVICE_CUDA
#include <mscclpp/atomic_device.hpp>
#undef MSCCLPP_DEVICE_CUDA
#else // !defined(USE_CUDA)
#else // !defined(MSCCLPP_USE_CUDA)
#define MSCCLPP_DEVICE_HIP
#include <mscclpp/atomic_device.hpp>
#undef MSCCLPP_DEVICE_HIP
#endif // !defined(USE_CUDA)
#endif // !defined(MSCCLPP_USE_CUDA)

#endif // MSCCLPP_ATOMIC_HPP_
2 changes: 1 addition & 1 deletion src/include/debug.h
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ extern char mscclppLastError[];
#define INFO(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_INFO, (FLAGS), __func__, __LINE__, __VA_ARGS__)
#define TRACE_CALL(...) mscclppDebugLog(MSCCLPP_LOG_TRACE, MSCCLPP_CALL, __func__, __LINE__, __VA_ARGS__)

#ifdef ENABLE_TRACE
#ifdef MSCCLPP_ENABLE_TRACE
#define TRACE(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_TRACE, (FLAGS), __func__, __LINE__, __VA_ARGS__)
extern std::chrono::steady_clock::time_point mscclppEpoch;
#else
Expand Down
2 changes: 1 addition & 1 deletion test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main)
set(TEST_INC_COMMON PRIVATE ${PROJECT_SOURCE_DIR}/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS})
set(TEST_INC_INTERNAL PRIVATE ${PROJECT_SOURCE_DIR}/src/include)

if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
file(GLOB_RECURSE CU_SOURCES CONFIGURE_DEPENDS *.cu)
set_source_files_properties(${CU_SOURCES} PROPERTIES LANGUAGE CXX)
endif()
Expand Down
2 changes: 1 addition & 1 deletion test/mscclpp-test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download
FetchContent_MakeAvailable(json)

function(add_mscclpp_test_executable name sources)
if(USE_ROCM)
if(MSCCLPP_USE_ROCM)
set_source_files_properties(${sources} PROPERTIES LANGUAGE CXX)
endif()
add_executable(${name} ${sources} common.cc)
Expand Down

0 comments on commit 2127a3b

Please sign in to comment.