diff --git a/.azure-pipelines/integration-test-rocm.yml b/.azure-pipelines/integration-test-rocm.yml index 3315dfa9..5a6de5ab 100644 --- a/.azure-pipelines/integration-test-rocm.yml +++ b/.azure-pipelines/integration-test-rocm.yml @@ -29,7 +29,7 @@ jobs: targetType: 'inline' script: | mkdir build && cd build - CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON .. + CXX=/opt/rocm/bin/hipcc cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/integration-test.yml b/.azure-pipelines/integration-test.yml index ff86a3e3..58181853 100644 --- a/.azure-pipelines/integration-test.yml +++ b/.azure-pipelines/integration-test.yml @@ -31,7 +31,7 @@ jobs: targetType: 'inline' script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.azure-pipelines/multi-nodes-test.yml b/.azure-pipelines/multi-nodes-test.yml index bcf51756..a8319212 100644 --- a/.azure-pipelines/multi-nodes-test.yml +++ b/.azure-pipelines/multi-nodes-test.yml @@ -26,7 +26,7 @@ jobs: targetType: 'inline' script: | mkdir build && cd build - cmake -DCMAKE_BUILD_TYPE=Release -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON .. + cmake -DCMAKE_BUILD_TYPE=Release -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. make -j workingDirectory: '$(System.DefaultWorkingDirectory)' diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index 73496445..6d07896c 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -46,7 +46,7 @@ jobs: - name: Build run: | rm -rf build && mkdir build && cd build - cmake -DBYPASS_GPU_CHECK=ON -DUSE_CUDA=ON .. + cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_CUDA=ON .. make -j - name: Perform CodeQL Analysis @@ -91,7 +91,7 @@ jobs: - name: Build run: | rm -rf build && mkdir build && cd build - CXX=/opt/rocm/bin/hipcc cmake -DBYPASS_GPU_CHECK=ON -DUSE_ROCM=ON .. + CXX=/opt/rocm/bin/hipcc cmake -DMSCCLPP_BYPASS_GPU_CHECK=ON -DMSCCLPP_USE_ROCM=ON .. make -j - name: Perform CodeQL Analysis diff --git a/CMakeLists.txt b/CMakeLists.txt index 6405511b..a573099d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,19 +14,19 @@ enable_language(CXX) list(APPEND CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake) # Options -option(ENABLE_TRACE "Enable tracing" OFF) -option(BUILD_TESTS "Build tests" ON) -option(BUILD_PYTHON_BINDINGS "Build Python bindings" ON) -option(BUILD_APPS_NCCL "Build NCCL interfaces" ON) -option(USE_CUDA "Use NVIDIA/CUDA." OFF) -option(USE_ROCM "Use AMD/ROCm." OFF) -option(BYPASS_GPU_CHECK "Bypass GPU check." OFF) - -if(BYPASS_GPU_CHECK) - if(USE_CUDA) +option(MSCCLPP_ENABLE_TRACE "Enable tracing" OFF) +option(MSCCLPP_BUILD_TESTS "Build tests" ON) +option(MSCCLPP_BUILD_PYTHON_BINDINGS "Build Python bindings" ON) +option(MSCCLPP_BUILD_APPS_NCCL "Build NCCL interfaces" ON) +option(MSCCLPP_USE_CUDA "Use NVIDIA/CUDA." OFF) +option(MSCCLPP_USE_ROCM "Use AMD/ROCm." OFF) +option(MSCCLPP_BYPASS_GPU_CHECK "Bypass GPU check." OFF) + +if(MSCCLPP_BYPASS_GPU_CHECK) + if(MSCCLPP_USE_CUDA) message("Bypassing GPU check: using NVIDIA/CUDA.") find_package(CUDAToolkit REQUIRED) - elseif(USE_ROCM) + elseif(MSCCLPP_USE_ROCM) message("Bypassing GPU check: using AMD/ROCm.") # Temporal fix for rocm5.6 set(CMAKE_PREFIX_PATH "/opt/rocm;${CMAKE_PREFIX_PATH}") @@ -40,16 +40,16 @@ else() include(CheckAmdGpu) if(NVIDIA_FOUND AND AMD_FOUND) message("Detected NVIDIA/CUDA and AMD/ROCm: prioritizing NVIDIA/CUDA.") - set(USE_CUDA ON) - set(USE_ROCM OFF) + set(MSCCLPP_USE_CUDA ON) + set(MSCCLPP_USE_ROCM OFF) elseif(NVIDIA_FOUND) message("Detected NVIDIA/CUDA.") - set(USE_CUDA ON) - set(USE_ROCM OFF) + set(MSCCLPP_USE_CUDA ON) + set(MSCCLPP_USE_ROCM OFF) elseif(AMD_FOUND) message("Detected AMD/ROCm.") - set(USE_CUDA OFF) - set(USE_ROCM ON) + set(MSCCLPP_USE_CUDA OFF) + set(MSCCLPP_USE_ROCM ON) else() message(FATAL_ERROR "Neither NVIDIA/CUDA nor AMD/ROCm is found.") endif() @@ -58,7 +58,7 @@ endif() # Declare project set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra") -if(USE_CUDA) +if(MSCCLPP_USE_CUDA) set(CMAKE_CUDA_STANDARD 17) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wextra") project(mscclpp LANGUAGES CXX CUDA) @@ -115,13 +115,13 @@ if(IBVERBS_FOUND) target_compile_definitions(mscclpp_obj PUBLIC USE_IBVERBS) endif() set_target_properties(mscclpp_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION}) -if(USE_CUDA) - target_compile_definitions(mscclpp_obj PRIVATE USE_CUDA) -elseif(USE_ROCM) - target_compile_definitions(mscclpp_obj PRIVATE USE_ROCM) +if(MSCCLPP_USE_CUDA) + target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_CUDA) +elseif(MSCCLPP_USE_ROCM) + target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_USE_ROCM) endif() -if(ENABLE_TRACE) - target_compile_definitions(mscclpp_obj PRIVATE ENABLE_TRACE) +if(MSCCLPP_ENABLE_TRACE) + target_compile_definitions(mscclpp_obj PRIVATE MSCCLPP_ENABLE_TRACE) endif() if(NPKIT_FLAGS) target_compile_definitions(mscclpp_obj PRIVATE ${NPKIT_FLAGS}) @@ -150,17 +150,17 @@ install(TARGETS mscclpp_static ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib) # Tests -if(BUILD_TESTS) +if(MSCCLPP_BUILD_TESTS) enable_testing() # Called here to allow ctest from the build directory add_subdirectory(test) endif() # Python bindings -if(BUILD_PYTHON_BINDINGS) +if(MSCCLPP_BUILD_PYTHON_BINDINGS) add_subdirectory(python) endif() # NCCL interfaces -if(BUILD_APPS_NCCL) +if(MSCCLPP_BUILD_APPS_NCCL) add_subdirectory(apps/nccl) endif() diff --git a/apps/nccl/CMakeLists.txt b/apps/nccl/CMakeLists.txt index 189a9759..7d8804bd 100644 --- a/apps/nccl/CMakeLists.txt +++ b/apps/nccl/CMakeLists.txt @@ -4,7 +4,7 @@ file(GLOB_RECURSE SOURCES CONFIGURE_DEPENDS src/*) file(GLOB_RECURSE HEADERS CONFIGURE_DEPENDS include/nccl.h) -if(USE_ROCM) +if(MSCCLPP_USE_ROCM) set_source_files_properties(${SOURCES} PROPERTIES LANGUAGE CXX) endif() @@ -14,10 +14,10 @@ target_sources(mscclpp_nccl_obj PUBLIC FILE_SET HEADERS FILES ${HEADERS}) target_include_directories(mscclpp_nccl_obj PRIVATE include ${PROJECT_SOURCE_DIR}/src/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS}) target_link_libraries(mscclpp_nccl_obj PRIVATE ${GPU_LIBRARIES} PUBLIC mscclpp_obj) set_target_properties(mscclpp_nccl_obj PROPERTIES LINKER_LANGUAGE CXX POSITION_INDEPENDENT_CODE 1 VERSION ${MSCCLPP_VERSION} SOVERSION ${MSCCLPP_SOVERSION}) -if(USE_CUDA) - target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_CUDA) -elseif(USE_ROCM) - target_compile_definitions(mscclpp_nccl_obj PRIVATE USE_ROCM) +if(MSCCLPP_USE_CUDA) + target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_CUDA) +elseif(MSCCLPP_USE_ROCM) + target_compile_definitions(mscclpp_nccl_obj PRIVATE MSCCLPP_USE_ROCM) endif() add_library(mscclpp_nccl SHARED) @@ -34,6 +34,6 @@ install(TARGETS mscclpp_nccl install(TARGETS mscclpp_nccl_static ARCHIVE DESTINATION ${INSTALL_PREFIX}/lib) -if(BUILD_TESTS) +if(MSCCLPP_BUILD_TESTS) add_subdirectory(test) endif() diff --git a/docs/getting-started/quickstart.md b/docs/getting-started/quickstart.md index 9eff7e0e..1155934a 100644 --- a/docs/getting-started/quickstart.md +++ b/docs/getting-started/quickstart.md @@ -13,7 +13,17 @@ * AMD MI250X GPUs + ROCm >= 5.7 * AMD MI300X GPUs + ROCm >= 6.0 * OS: tested over Ubuntu 18.04 and 20.04 -* Libraries: [libnuma](https://github.com/numactl/numactl), MPI (optional) +* Libraries + * [libnuma](https://github.com/numactl/numactl) + ```bash + sudo apt-get install libnuma-dev + ``` + * (Optional, for [building the Python module](#install-from-source-python-module)) Python >= 3.8 and Python Development Package + ```bash + sudo apt-get satisfy "python3 (>=3.8), python3-dev (>=3.8)" + ``` + If you don't want to build Python module, you need to set `-DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF` in your `cmake` command (see details in [Install from Source (Libraries and Headers)](#install-from-source-libraries-and-headers)). + * (Optional, for benchmarks) MPI * Others * For NVIDIA platforms, `nvidia_peermem` driver should be loaded on all nodes. Check it via: ``` @@ -60,11 +70,12 @@ $ CXX=/path/to/hipcc cmake -DCMAKE_BUILD_TYPE=Release .. $ make -j ``` +(install-from-source-libraries-and-headers)= ## Install from Source (Libraries and Headers) ```bash # Install the generated headers and binaries to /usr/local/mscclpp -$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DBUILD_PYTHON_BINDINGS=OFF .. +$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/mscclpp -DMSCCLPP_BUILD_PYTHON_BINDINGS=OFF .. $ make -j mscclpp mscclpp_static $ sudo make install/fast ``` diff --git a/pyproject.toml b/pyproject.toml index f65a2e14..99fcb4c1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,8 +16,8 @@ wheel.packages = ["python/mscclpp", "python/mscclpp_benchmark"] wheel.install-dir = "mscclpp" [tool.scikit-build.cmake.define] -BUILD_PYTHON_BINDINGS = "ON" -BUILD_TESTS = "OFF" +MSCCLPP_BUILD_PYTHON_BINDINGS = "ON" +MSCCLPP_BUILD_TESTS = "OFF" [tool.black] line-length = 120 diff --git a/src/bootstrap/socket.cc b/src/bootstrap/socket.cc index 6377bc6d..b9216e78 100644 --- a/src/bootstrap/socket.cc +++ b/src/bootstrap/socket.cc @@ -78,7 +78,7 @@ static int envSocketFamily(void) { static int findInterfaces(const char* prefixList, char* names, union SocketAddress* addrs, int sock_family, int maxIfNameSize, int maxIfs) { -#ifdef ENABLE_TRACE +#ifdef MSCCLPP_ENABLE_TRACE char line[SOCKET_NAME_MAXLEN + 1]; #endif struct mscclpp::netIf userIfs[MAX_IFS]; @@ -184,7 +184,7 @@ static bool matchSubnet(struct ifaddrs local_if, union SocketAddress* remote) { int FindInterfaceMatchSubnet(char* ifNames, union SocketAddress* localAddrs, union SocketAddress* remoteAddr, int ifNameMaxSize, int maxIfs) { -#ifdef ENABLE_TRACE +#ifdef MSCCLPP_ENABLE_TRACE char line[SOCKET_NAME_MAXLEN + 1]; #endif char line_a[SOCKET_NAME_MAXLEN + 1]; @@ -436,7 +436,7 @@ void Socket::bind() { void Socket::bindAndListen() { bind(); -#ifdef ENABLE_TRACE +#ifdef MSCCLPP_ENABLE_TRACE char line[SOCKET_NAME_MAXLEN + 1]; TRACE(MSCCLPP_INIT | MSCCLPP_NET, "Listening on socket %s", SocketToString(&addr_, line)); #endif @@ -452,7 +452,7 @@ void Socket::bindAndListen() { void Socket::connect(int64_t timeout) { mscclpp::Timer timer; -#ifdef ENABLE_TRACE +#ifdef MSCCLPP_ENABLE_TRACE char line[SOCKET_NAME_MAXLEN + 1]; #endif const int one = 1; diff --git a/src/include/atomic.hpp b/src/include/atomic.hpp index d7f61fec..5c7b7b79 100644 --- a/src/include/atomic.hpp +++ b/src/include/atomic.hpp @@ -4,14 +4,14 @@ #ifndef MSCCLPP_ATOMIC_HPP_ #define MSCCLPP_ATOMIC_HPP_ -#if defined(USE_CUDA) +#if defined(MSCCLPP_USE_CUDA) #define MSCCLPP_DEVICE_CUDA #include #undef MSCCLPP_DEVICE_CUDA -#else // !defined(USE_CUDA) +#else // !defined(MSCCLPP_USE_CUDA) #define MSCCLPP_DEVICE_HIP #include #undef MSCCLPP_DEVICE_HIP -#endif // !defined(USE_CUDA) +#endif // !defined(MSCCLPP_USE_CUDA) #endif // MSCCLPP_ATOMIC_HPP_ diff --git a/src/include/debug.h b/src/include/debug.h index 1abbad34..713371b6 100644 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -107,7 +107,7 @@ extern char mscclppLastError[]; #define INFO(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_INFO, (FLAGS), __func__, __LINE__, __VA_ARGS__) #define TRACE_CALL(...) mscclppDebugLog(MSCCLPP_LOG_TRACE, MSCCLPP_CALL, __func__, __LINE__, __VA_ARGS__) -#ifdef ENABLE_TRACE +#ifdef MSCCLPP_ENABLE_TRACE #define TRACE(FLAGS, ...) mscclppDebugLog(MSCCLPP_LOG_TRACE, (FLAGS), __func__, __LINE__, __VA_ARGS__) extern std::chrono::steady_clock::time_point mscclppEpoch; #else diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 96a1ca54..a82d1db9 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -11,7 +11,7 @@ set(TEST_LIBS_GTEST GTest::gtest_main GTest::gmock_main) set(TEST_INC_COMMON PRIVATE ${PROJECT_SOURCE_DIR}/include SYSTEM PRIVATE ${GPU_INCLUDE_DIRS}) set(TEST_INC_INTERNAL PRIVATE ${PROJECT_SOURCE_DIR}/src/include) -if(USE_ROCM) +if(MSCCLPP_USE_ROCM) file(GLOB_RECURSE CU_SOURCES CONFIGURE_DEPENDS *.cu) set_source_files_properties(${CU_SOURCES} PROPERTIES LANGUAGE CXX) endif() diff --git a/test/mscclpp-test/CMakeLists.txt b/test/mscclpp-test/CMakeLists.txt index e2ec8c2e..aa117cb2 100644 --- a/test/mscclpp-test/CMakeLists.txt +++ b/test/mscclpp-test/CMakeLists.txt @@ -5,7 +5,7 @@ FetchContent_Declare(json URL https://github.com/nlohmann/json/releases/download FetchContent_MakeAvailable(json) function(add_mscclpp_test_executable name sources) - if(USE_ROCM) + if(MSCCLPP_USE_ROCM) set_source_files_properties(${sources} PROPERTIES LANGUAGE CXX) endif() add_executable(${name} ${sources} common.cc)