Permalink
Browse files

* Renamed viennacl::linalg::single_threaded to viennacl::linalg::host…

…_based

* Reduced execution time of sparse-test by speeding up the reference uBLAS calculations (double-transpose-trick)
  • Loading branch information...
karlrupp committed Nov 29, 2012
1 parent 11b7e84 commit 9cf4f1f2f5e95f93fdaa378349bd31ebe7c98766
View
@@ -15,7 +15,7 @@ rm viennacl/linalg/*.*~
rm viennacl/linalg/kernels/*.*~
rm viennacl/linalg/backend/*.*~
rm viennacl/linalg/opencl/*.*~
rm viennacl/linalg/single_threaded/*.*~
rm viennacl/linalg/host_based/*.*~
rm viennacl/linalg/cuda/*.*~
rm viennacl/linalg/detail/amg/*.*~
rm viennacl/linalg/detail/spai/*.*~
View
@@ -394,11 +394,12 @@ int test(Epsilon const& epsilon)
//
// Triangular solvers for A \ b:
//
/*
ublas::compressed_matrix<NumericT> ublas_matrix_trans = trans(ublas_matrix); //note: triangular solvers with uBLAS show atrocious performance, while transposed solvers are quite okay. To keep execution times short, we use a double-transpose-trick in the following.
std::cout << "Testing unit upper triangular solve: compressed_matrix" << std::endl;
result = rhs;
viennacl::copy(result, vcl_result);
boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::unit_upper_tag());
boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_upper_tag());
viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_upper_tag());
if( std::fabs(diff(result, vcl_result)) > epsilon )
@@ -411,21 +412,20 @@ int test(Epsilon const& epsilon)
std::cout << "Testing upper triangular solve: compressed_matrix" << std::endl;
result = rhs;
viennacl::copy(result, vcl_result);
boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::upper_tag());
boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::upper_tag());
viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::upper_tag());
if( std::fabs(diff(result, vcl_result)) > epsilon )
{
std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl;
std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl;
retval = EXIT_FAILURE;
} */
}
std::cout << "Testing unit lower triangular solve: compressed_matrix" << std::endl;
result = rhs;
viennacl::copy(result, vcl_result);
boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::unit_lower_tag());
boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::unit_lower_tag());
viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::unit_lower_tag());
/*std::list< viennacl::backend::mem_handle > multifrontal_L_row_index_arrays_;
@@ -464,11 +464,11 @@ int test(Epsilon const& epsilon)
}
std::cout << "Testing upper triangular solve: compressed_matrix" << std::endl;
std::cout << "Testing lower triangular solve: compressed_matrix" << std::endl;
result = rhs;
viennacl::copy(result, vcl_result);
boost::numeric::ublas::inplace_solve(ublas_matrix, result, boost::numeric::ublas::upper_tag());
viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::upper_tag());
boost::numeric::ublas::inplace_solve(trans(ublas_matrix_trans), result, boost::numeric::ublas::lower_tag());
viennacl::linalg::inplace_solve(vcl_compressed_matrix, vcl_result, viennacl::linalg::lower_tag());
/*std::list< viennacl::backend::mem_handle > multifrontal_U_row_index_arrays_;
std::list< viennacl::backend::mem_handle > multifrontal_U_row_buffers_;
@@ -497,7 +497,7 @@ int test(Epsilon const& epsilon)
if( std::fabs(diff(result, vcl_result)) > epsilon )
{
std::cout << "# Error at operation: upper triangular solve with compressed_matrix" << std::endl;
std::cout << "# Error at operation: lower triangular solve with compressed_matrix" << std::endl;
std::cout << " diff: " << std::fabs(diff(result, vcl_result)) << std::endl;
retval = EXIT_FAILURE;
}
@@ -17,7 +17,7 @@
License: MIT (X11), see file LICENSE in the base directory
============================================================================= */
/** @file circulant_matrix_operations.hpp
/** @file viennacl/linalg/circulant_matrix_operations.hpp
@brief Implementations of operations using circulant_matrix
*/
@@ -73,13 +73,6 @@ namespace viennacl
std::size_t num_rows
)
{
ScalarType * vec_buf = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(vec.handle());
unsigned int const * elim_row_index = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(row_index_array);
unsigned int const * elim_row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(row_buffer);
unsigned int const * elim_col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(col_buffer);
ScalarType const * elim_elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(element_buffer);
level_scheduling_substitute_kernel<<<128, 128>>>(detail::cuda_arg<unsigned int>(row_index_array.cuda_handle()),
detail::cuda_arg<unsigned int>(row_buffer.cuda_handle()),
detail::cuda_arg<unsigned int>(col_buffer.cuda_handle()),
@@ -29,7 +29,6 @@
#include "viennacl/meta/enable_if.hpp"
#include "viennacl/traits/size.hpp"
#include "viennacl/traits/start.hpp"
#include "viennacl/linalg/single_threaded/common.hpp"
#include "viennacl/traits/stride.hpp"
namespace viennacl
@@ -88,13 +88,13 @@ namespace viennacl
assert( (A.handle2().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
assert( (A.handle().get_active_handle_id() == viennacl::MAIN_MEMORY) && bool("System matrix must reside in main memory for ILU0") );
ScalarType const * A_elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(A.handle());
unsigned int const * A_row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(A.handle1());
unsigned int const * A_col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(A.handle2());
ScalarType const * A_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(A.handle());
unsigned int const * A_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle1());
unsigned int const * A_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(A.handle2());
ScalarType * output_elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(diagonal_block_A.handle());
unsigned int * output_row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
unsigned int * output_col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
ScalarType * output_elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(diagonal_block_A.handle());
unsigned int * output_row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle1());
unsigned int * output_col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(diagonal_block_A.handle2());
std::size_t output_counter = 0;
for (std::size_t row = start_index; row < stop_index; ++row)
@@ -179,12 +179,12 @@ namespace viennacl
{
detail::ilu_vector_range<VectorType, ScalarType> vec_range(vec, block_indices_[i].first, LU_blocks[i].size2());
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
}
}
@@ -198,7 +198,7 @@ namespace viennacl
viennacl::copy(A, mat);
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(mat.handle1());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
#ifdef VIENNACL_WITH_OPENMP
#pragma omp parallel for
@@ -328,36 +328,36 @@ namespace viennacl
viennacl::memory_types old_memory_location = viennacl::memory_domain(vec);
viennacl::switch_memory_domain(vec, viennacl::MAIN_MEMORY);
ScalarType * vector_entries = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(vec);
ScalarType * vector_entries = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(vec);
for (std::size_t i=0; i<block_indices_.size(); ++i)
{
detail::ilu_vector_range<ScalarType *, ScalarType> vec_range(vector_entries, block_indices_[i].first, LU_blocks[i].size2());
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
}
viennacl::switch_memory_domain(vec, old_memory_location);
}
else //apply directly:
{
ScalarType * vector_entries = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(vec);
ScalarType * vector_entries = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(vec);
for (std::size_t i=0; i<block_indices_.size(); ++i)
{
detail::ilu_vector_range<ScalarType *, ScalarType> vec_range(vector_entries, block_indices_[i].first, LU_blocks[i].size2());
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle1());
unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU_blocks[i].handle2());
ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU_blocks[i].handle());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::single_threaded::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), unit_lower_tag());
viennacl::linalg::host_based::detail::csr_inplace_solve<ScalarType>(row_buffer, col_buffer, elements, vec_range, LU_blocks[i].size2(), upper_tag());
}
}
@@ -376,7 +376,7 @@ namespace viennacl
viennacl::copy(A, temp);
viennacl::copy(temp, mat);
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(mat.handle1());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(mat.handle1());
#ifdef VIENNACL_WITH_OPENMP
#pragma omp parallel for
@@ -432,9 +432,9 @@ namespace viennacl
{
MatrixType const & current_block = LU_blocks[block_index];
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(current_block.handle1());
unsigned int const * col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(current_block.handle2());
ScalarType const * elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(current_block.handle());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle1());
unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(current_block.handle2());
ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(current_block.handle());
std::size_t block_start = block_indices_[block_index].first;
@@ -31,7 +31,7 @@
#include "viennacl/tools/tools.hpp"
#include "viennacl/backend/memory.hpp"
#include "viennacl/linalg/single_threaded/common.hpp"
#include "viennacl/linalg/host_based/common.hpp"
#include "viennacl/linalg/misc_operations.hpp"
namespace viennacl
@@ -56,10 +56,10 @@ namespace viennacl
std::list< std::size_t > & row_elimination_num_list,
bool setup_U)
{
ScalarType const * diagonal_buf = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(diagonal_LU.handle());
ScalarType const * elements = viennacl::linalg::single_threaded::detail::extract_raw_pointer<ScalarType>(LU.handle());
unsigned int const * row_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU.handle1());
unsigned int const * col_buffer = viennacl::linalg::single_threaded::detail::extract_raw_pointer<unsigned int>(LU.handle2());
ScalarType const * diagonal_buf = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(diagonal_LU.handle());
ScalarType const * elements = viennacl::linalg::host_based::detail::extract_raw_pointer<ScalarType>(LU.handle());
unsigned int const * row_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle1());
unsigned int const * col_buffer = viennacl::linalg::host_based::detail::extract_raw_pointer<unsigned int>(LU.handle2());
//
// Step 1: Determine row elimination order for each row and build up meta information about the number of entries taking part in each elimination step:
Oops, something went wrong.

0 comments on commit 9cf4f1f

Please sign in to comment.