include/votca/xtp/cudapipeline.h - votca-xtp (upstream/2021)

Tree @upstream/2021 (Download .tar.gz)

cudapipeline.h @upstream/2021 — raw · history · blame

/*
 *            Copyright 2009-2020 The VOTCA Development Team
 *                       (http://www.votca.org)
 *
 *      Licensed under the Apache License, Version 2.0 (the "License")
 *
 * You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *              http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

#ifndef VOTCA_XTP_CUDAPIPELINE_H
#define VOTCA_XTP_CUDAPIPELINE_H

// CMake generated file
#include "votca_xtp_config.h"
#ifndef USE_CUDA
#error Cuda not enabled
#endif

// Local VOTCA includes
#include "cudamatrix.h"

/*
 * \brief Perform Tensor-matrix multiplications in a GPU
 *
 * The `CudaPipeline` class handles the allocation and deallocation of arrays on
 * the GPU.
 */

namespace votca {
namespace xtp {

/* \brief The CudaPipeline class offload Eigen operations to an *Nvidia* GPU
 * using the CUDA language. The Cublas handle is the context manager for all the
 * resources needed by Cublas. While a stream is a queue of sequential
 * operations executed in the Nvidia device.
 */
class CudaPipeline {
 public:
  CudaPipeline(int deviceID) : _deviceID{deviceID} {
    checkCuda(cudaSetDevice(deviceID));
    cublasCreate(&_handle);
    cudaStreamCreate(&_stream);
  }

  ~CudaPipeline();
  CudaPipeline() = delete;
  CudaPipeline(const CudaPipeline &) = delete;
  CudaPipeline &operator=(const CudaPipeline &) = delete;

  // Invoke the ?gemm function of cublas
  void gemm(const CudaMatrix &A, const CudaMatrix &B, CudaMatrix &C,
            bool transpose_A = false, bool transpose_B = false,
            double beta = 0.0) const;

  // Invoke the multiplication with a diagonal matrix of cublas, diagonal matrix
  // B must have 1 column
  void diag_gemm(const CudaMatrix &A, const CudaMatrix &b, CudaMatrix &C) const;

  const cudaStream_t &get_stream() const { return _stream; };

  int getDeviceId() const { return _deviceID; }

 private:
  int _deviceID = 0;
  // The cublas handles allocates hardware resources on the host and device.
  cublasHandle_t _handle;

  // Asynchronous stream
  cudaStream_t _stream;
};

}  // namespace xtp
}  // namespace votca

#endif  // VOTCA_XTP_CUDAPIPELINE_H