Merge pull request #279827 from markuskowa/fix-cp2k-cuda

cp2k: fix CUDA support
2024-01-09 16:20:42 +01:00 · 2024-01-09 16:20:42 +01:00 · 250bf92b66
commit 250bf92b66
parent db4a9fe8f6 930e7fe5d1
1 changed files with 17 additions and 10 deletions
--- a/pkgs/applications/science/chemistry/cp2k/default.nix
+++ b/pkgs/applications/science/chemistry/cp2k/default.nix
@ -38,13 +38,11 @@
  then "rocm"
  else "none"
 )
-# gpuVersion needs to be set for both CUDA as well as ROCM hardware.
-# gpuArch is only required for the ROCM stack.
 # Change to a value suitable for your target GPU.
 # For AMD values see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2v-rocmhip-support-for-amd-gpu
 # and for Nvidia see https://github.com/cp2k/cp2k/blob/master/INSTALL.md#2i-cuda-optional-improved-performance-on-gpu-systems
-, gpuVersion ? "Mi100"
-, gpuArch ? "gfx908"
+, gpuVersion ? ( if gpuBackend == "cuda" then "A100" else "Mi100" )
+, gpuArch ? ( if gpuBackend == "cuda" then "sm_80" else "gfx908" )
 }:

 assert builtins.elem gpuBackend [ "none" "cuda" "rocm" ];
@ -66,7 +64,9 @@ stdenv.mkDerivation rec {
    fetchSubmodules = true;
  };

-  nativeBuildInputs = [ python3 which openssh makeWrapper pkg-config ];
+  nativeBuildInputs = [ python3 which openssh makeWrapper pkg-config ]
+    ++ lib.optional (gpuBackend == "cuda") cudaPackages.cuda_nvcc;
+
  buildInputs = [
    gfortran
    fftw
@ -88,8 +88,11 @@ stdenv.mkDerivation rec {
    libvdwxc
  ]
  ++ lib.optional enableElpa elpa
-  ++ lib.optional (gpuBackend == "cuda") cudaPackages.cudatoolkit
-  ++ lib.optional (gpuBackend == "rocm") [
+  ++ lib.optionals (gpuBackend == "cuda") [
+    cudaPackages.cuda_cudart
+    cudaPackages.libcublas
+    cudaPackages.cuda_nvrtc
+  ] ++ lib.optionals (gpuBackend == "rocm") [
    rocmPackages.clr
    rocmPackages.rocm-core
    rocmPackages.hipblas
@ -126,7 +129,7 @@ stdenv.mkDerivation rec {
    AR         = ar -r
    ${lib.strings.optionalString (gpuBackend == "cuda") ''
    OFFLOAD_CC = nvcc
-    OFFLOAD_FLAGS = -O3 -g -w --std=c++11
+    OFFLOAD_FLAGS = -O3 -g -w --std=c++11 -arch ${gpuArch}
    OFFLOAD_TARGET = cuda
    GPUVER = ${gpuVersion}
    CXX = mpicxx
@ -144,7 +147,7 @@ stdenv.mkDerivation rec {
                 -D__MPI_VERSION=3 -D__F2008 -D__LIBXSMM -D__SPGLIB \
                 -D__MAX_CONTR=4 -D__LIBVORI ${lib.optionalString enableElpa "-D__ELPA"} \
                 -D__PLUMED2 -D__HDF5 -D__GSL -D__SIRIUS -D__LIBVDWXC -D__SPFFT -D__SPLA \
-                 ${lib.strings.optionalString (gpuBackend == "cuda") "-D__OFFLOAD_CUDA -D__DBCSR_ACC"} \
+                 ${lib.strings.optionalString (gpuBackend == "cuda") "-D__OFFLOAD_CUDA -D__ACC -D__DBCSR_ACC -D__NO_OFFLOAD_PW"} \
                 ${lib.strings.optionalString (gpuBackend == "rocm") "-D__OFFLOAD_HIP -D__DBCSR_ACC -D__NO_OFFLOAD_PW"}
    CFLAGS    = -fopenmp -I${lib.getDev hdf5-fortran}/include -I${lib.getDev gsl}/include
    FCFLAGS    = \$(DFLAGS) -O2 -ffree-form -ffree-line-length-none \
@ -163,7 +166,11 @@ stdenv.mkDerivation rec {
                 -fopenmp ${lib.optionalString enableElpa "$(pkg-config --libs elpa)"} \
                 -lz -ldl ${lib.optionalString (mpi.pname == "openmpi") "$(mpicxx --showme:link)"} \
                 -lplumed -lhdf5_fortran -lhdf5_hl -lhdf5 -lgsl -lsirius -lspla -lspfft -lvdwxc \
-                 ${lib.strings.optionalString (gpuBackend == "cuda") "-lcudart -lnvrtc -lcuda -lcublas"} \
+                 ${lib.strings.optionalString (gpuBackend == "cuda") ''
+                   -L${cudaPackages.cuda_cudart}/lib/stubs/ \
+                   -lcudart -lnvrtc -lcuda -lcublas
+                   ''
+                 } \
                 ${lib.strings.optionalString (gpuBackend == "rocm") "-lamdhip64 -lhipfft -lhipblas -lrocblas"}
    LDFLAGS    = \$(FCFLAGS) \$(LIBS)
    include ${plumed}/lib/plumed/src/lib/Plumed.inc