mirror of
https://github.com/llvm/llvm-project.git
synced 2025-04-17 22:26:39 +00:00
147 lines
5.4 KiB
CMake
147 lines
5.4 KiB
CMake
option(FLANG_EXPERIMENTAL_CUDA_RUNTIME
|
|
"Compile Fortran runtime as CUDA sources (experimental)" OFF
|
|
)
|
|
|
|
option(FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS
|
|
"Do not compile global variables' definitions when producing PTX library" OFF
|
|
)
|
|
|
|
set(FLANG_LIBCUDACXX_PATH "" CACHE PATH "Path to libcu++ package installation")
|
|
|
|
set(FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD "off" CACHE STRING
|
|
"Compile Fortran runtime as OpenMP target offload sources (experimental). Valid options are 'off', 'host_device', 'nohost'")
|
|
|
|
set(FLANG_OMP_DEVICE_ARCHITECTURES "all" CACHE STRING
|
|
"List of OpenMP device architectures to be used to compile the Fortran runtime (e.g. 'gfx1103;sm_90')")
|
|
|
|
macro(enable_cuda_compilation name files)
|
|
if (FLANG_EXPERIMENTAL_CUDA_RUNTIME)
|
|
if (BUILD_SHARED_LIBS)
|
|
message(FATAL_ERROR
|
|
"BUILD_SHARED_LIBS is not supported for CUDA build of Fortran runtime"
|
|
)
|
|
endif()
|
|
|
|
enable_language(CUDA)
|
|
|
|
# TODO: figure out how to make target property CUDA_SEPARABLE_COMPILATION
|
|
# work, and avoid setting CMAKE_CUDA_SEPARABLE_COMPILATION.
|
|
set(CMAKE_CUDA_SEPARABLE_COMPILATION ON)
|
|
|
|
# Treat all supported sources as CUDA files.
|
|
set_source_files_properties(${files} PROPERTIES LANGUAGE CUDA)
|
|
set(CUDA_COMPILE_OPTIONS)
|
|
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "Clang")
|
|
# Allow varargs.
|
|
set(CUDA_COMPILE_OPTIONS
|
|
-Xclang -fcuda-allow-variadic-functions
|
|
)
|
|
endif()
|
|
if ("${CMAKE_CUDA_COMPILER_ID}" MATCHES "NVIDIA")
|
|
set(CUDA_COMPILE_OPTIONS
|
|
--expt-relaxed-constexpr
|
|
# Disable these warnings:
|
|
# 'long double' is treated as 'double' in device code
|
|
-Xcudafe --diag_suppress=20208
|
|
-Xcudafe --display_error_number
|
|
)
|
|
endif()
|
|
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
|
|
"${CUDA_COMPILE_OPTIONS}"
|
|
)
|
|
|
|
if (EXISTS "${FLANG_LIBCUDACXX_PATH}/include")
|
|
# When using libcudacxx headers files, we have to use them
|
|
# for all files of F18 runtime.
|
|
include_directories(AFTER ${FLANG_LIBCUDACXX_PATH}/include)
|
|
add_compile_definitions(RT_USE_LIBCUDACXX=1)
|
|
endif()
|
|
|
|
# Add an OBJECT library consisting of CUDA PTX.
|
|
llvm_add_library(${name}PTX OBJECT PARTIAL_SOURCES_INTENDED ${files})
|
|
set_property(TARGET obj.${name}PTX PROPERTY CUDA_PTX_COMPILATION ON)
|
|
if (FLANG_CUDA_RUNTIME_PTX_WITHOUT_GLOBAL_VARS)
|
|
target_compile_definitions(obj.${name}PTX
|
|
PRIVATE FLANG_RUNTIME_NO_GLOBAL_VAR_DEFS
|
|
)
|
|
endif()
|
|
endif()
|
|
endmacro()
|
|
|
|
macro(enable_omp_offload_compilation files)
|
|
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "off")
|
|
# 'host_device' build only works with Clang compiler currently.
|
|
# The build is done with the CMAKE_C/CXX_COMPILER, i.e. it does not use
|
|
# the in-tree built Clang. We may have a mode that would use the in-tree
|
|
# built Clang.
|
|
#
|
|
# 'nohost' is supposed to produce an LLVM Bitcode library,
|
|
# and it has to be done with a C/C++ compiler producing LLVM Bitcode
|
|
# compatible with the LLVM toolchain version distributed with the Flang
|
|
# compiler.
|
|
# In general, the in-tree built Clang should be used for 'nohost' build.
|
|
# Note that 'nohost' build does not produce the host version of Flang
|
|
# runtime library, so there will be two separate distributable objects.
|
|
# 'nohost' build is a TODO.
|
|
|
|
if (NOT FLANG_EXPERIMENTAL_OMP_OFFLOAD_BUILD STREQUAL "host_device")
|
|
message(FATAL_ERROR "Unsupported OpenMP offload build of Flang runtime")
|
|
endif()
|
|
if (BUILD_SHARED_LIBS)
|
|
message(FATAL_ERROR
|
|
"BUILD_SHARED_LIBS is not supported for OpenMP offload build of Fortran runtime"
|
|
)
|
|
endif()
|
|
|
|
if ("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang" AND
|
|
"${CMAKE_C_COMPILER_ID}" MATCHES "Clang")
|
|
|
|
set(all_amdgpu_architectures
|
|
"gfx700;gfx701;gfx801;gfx803;gfx900;gfx902;gfx906"
|
|
"gfx908;gfx90a;gfx90c;gfx942;gfx950;gfx1010;gfx1030"
|
|
"gfx1031;gfx1032;gfx1033;gfx1034;gfx1035;gfx1036"
|
|
"gfx1100;gfx1101;gfx1102;gfx1103;gfx1150;gfx1151"
|
|
"gfx1152;gfx1153;gfx1200;gfx1201"
|
|
)
|
|
set(all_nvptx_architectures
|
|
"sm_35;sm_37;sm_50;sm_52;sm_53;sm_60;sm_61;sm_62"
|
|
"sm_70;sm_72;sm_75;sm_80;sm_86;sm_89;sm_90"
|
|
)
|
|
set(all_gpu_architectures
|
|
"${all_amdgpu_architectures};${all_nvptx_architectures}"
|
|
)
|
|
# TODO: support auto detection on the build system.
|
|
if (FLANG_OMP_DEVICE_ARCHITECTURES STREQUAL "all")
|
|
set(FLANG_OMP_DEVICE_ARCHITECTURES ${all_gpu_architectures})
|
|
endif()
|
|
list(REMOVE_DUPLICATES FLANG_OMP_DEVICE_ARCHITECTURES)
|
|
|
|
string(REPLACE ";" "," compile_for_architectures
|
|
"${FLANG_OMP_DEVICE_ARCHITECTURES}"
|
|
)
|
|
|
|
set(OMP_COMPILE_OPTIONS
|
|
-fopenmp
|
|
-fvisibility=hidden
|
|
-fopenmp-cuda-mode
|
|
--offload-arch=${compile_for_architectures}
|
|
# Force LTO for the device part.
|
|
-foffload-lto
|
|
)
|
|
set_source_files_properties(${files} PROPERTIES COMPILE_OPTIONS
|
|
"${OMP_COMPILE_OPTIONS}"
|
|
)
|
|
|
|
# Enable "declare target" in the source code.
|
|
set_source_files_properties(${files}
|
|
PROPERTIES COMPILE_DEFINITIONS OMP_OFFLOAD_BUILD
|
|
)
|
|
else()
|
|
message(FATAL_ERROR
|
|
"Flang runtime build is not supported for these compilers:\n"
|
|
"CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}\n"
|
|
"CMAKE_C_COMPILER_ID: ${CMAKE_C_COMPILER_ID}")
|
|
endif()
|
|
endif()
|
|
endmacro()
|