-
Notifications
You must be signed in to change notification settings - Fork 172
Description
Summary
oneapi::mkl::lapack::gesvd return empty result on cuda backend if M<N.
Version
GIT_TAG f2d2dcb
Environment
- HW you use: NVIDIA CUDA BACKEND, NVIDIA GeForce GT 1030 6.1 [CUDA 12.2]
- OS name and version: Ubuntu 22.04.3 LTS (GNU/Linux 6.2.0-32-generic x86_64)
- Compiler version: Intel(R) oneAPI DPC++/C++ Compiler 2024.2.0 (2024.2.0.20240602), cuda version 12.2
Example
Using example from https://www.intel.com/content/www/us/en/docs/onemkl/code-samples-lapack/2022-1/cgesvd-example-c.html
test.cpp
#include <sycl/sycl.hpp>
#include <vector>
#include <iostream>
#include <string>
#include "oneapi/mkl.hpp"
using namespace sycl;
/* Parameters */
#define M 3
#define N 4
#define LDA M
#define LDU M
#define LDVT N
typedef std::complex<float> fcomplex;
namespace mkl_lapack = oneapi::mkl::lapack;
/* Auxiliary routine: printing a matrix */
void print_matrix_u(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
int i, j;
printf("\n %s\n", desc);
fcomplex a[LDU * M];
q.copy<fcomplex>(_a, a, LDU * M).wait();
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++)
printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
printf("\n");
}
}
/* Auxiliary routine: printing a matrix */
void print_matrix_vt(char* desc, int m, int n, fcomplex* _a, int lda, sycl::queue q) {
int i, j;
printf("\n %s\n", desc);
fcomplex a[LDVT * N];
q.copy<fcomplex>(_a, a, LDVT * N).wait();
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++)
printf(" (%6.2f,%6.2f)", a[i + j * lda].real(), a[i + j * lda].imag());
printf("\n");
}
}
/* Auxiliary routine: printing a real matrix */
void print_rmatrix(char* desc, int m, int n, float* _a, int lda, sycl::queue q) {
int i, j;
printf("\n %s\n", desc);
float a[M];
q.copy<float>(_a, a, M).wait();
for (i = 0; i < m; i++) {
for (j = 0; j < n; j++) printf(" %6.2f", a[i + j * lda]);
printf("\n");
}
}
/* Main program */
int main() {
/* Locals */
int m = M, n = N, lda = LDA, ldu = LDU, ldvt = LDVT;
sycl::queue q;
std::cout << "Device name is: " << q.get_device().get_info<sycl::info::device::name>() << std::endl;
float* s = sycl::malloc_device<float>(M, q);
fcomplex* u = sycl::malloc_device<fcomplex>(LDU * M, q);
fcomplex* vt = sycl::malloc_device<fcomplex>(LDVT * N, q);
fcomplex a_host[LDA * N] = {
{ 5.91f, -5.69f}, {-3.15f, -4.08f}, {-4.89f, 4.20f},
{ 7.09f, 2.72f}, {-1.89f, 3.27f}, { 4.10f, -6.70f},
{ 7.78f, -4.06f}, { 4.57f, -2.07f}, { 3.28f, -3.84f},
{-0.79f, -7.21f}, {-3.88f, -3.30f}, { 3.84f, 1.19f}
};
fcomplex* a = sycl::malloc_device<fcomplex>(LDA * N, q);
const sycl::event& copy_a = q.copy<fcomplex>(a_host, a, LDA * N);
const std::int64_t scratchpad_size = 10240;
fcomplex* scratchpad = sycl::malloc_device<fcomplex>(scratchpad_size, q);
mkl_lapack::gesvd(q, oneapi::mkl::jobsvd::vectors, oneapi::mkl::jobsvd::vectors, m, n, a, lda, s, u, ldu, vt, ldvt, scratchpad, scratchpad_size, { copy_a }).wait();
printf(" GESVD Example Program Results\n");
print_rmatrix("Singular values", 1, m, s, 1, q);
print_matrix_u("Left singular vectors (stored columnwise)", m, m, u, ldu, q);
print_matrix_vt("Right singular vectors (stored rowwise)", m, n, vt, ldvt, q);
sycl::free(s, q);
sycl::free(u, q);
sycl::free(vt, q);
sycl::free(a, q);
exit(0);
}
icpx -fsycl -fsycl-targets=nvptx64-nvidia-cuda test.cpp libonemkl.so.0 libonemkl_lapack_cusolver.so.0
Observed behavior
GESVD Example Program Results
Singular values
0.00 0.00 0.00
Left singular vectors (stored columnwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
Right singular vectors (stored rowwise)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00) ( 0.00, 0.00)
Expected behavior
Expected out:
GESVD Example Program Results
Singular values
17.63 11.61 6.78
Left singular vectors(stored columnwise)
(-0.86, -0.00) (0.40, -0.00) (0.32, 0.00)
(-0.35, 0.13) (-0.24, -0.21) (-0.63, 0.60)
(0.15, 0.32) (0.61, 0.61) (-0.36, 0.10)
Right singular vectors(stored rowwise)
(-0.22, 0.51) (-0.37, -0.32) (-0.53, 0.11) (0.15, 0.38)
(0.31, 0.31) (0.09, -0.57) (0.18, -0.39) (0.38, -0.39)
(0.53, 0.24) (0.49, 0.28) (-0.47, -0.25) (-0.15, 0.19)