Skip to content

Commit

Permalink
Debugging on Windows and cross-device (#151)
Browse files Browse the repository at this point in the history
* _alligned_malloc for Windows

* Library compiles but cannot find pthread.lib

* OpenCL builds if xxd.exe provided, runs some unit tests

* defined(_WIN32) && !defined(__CYGWIN__)

* Reverse _aligned_malloc argument order

* Cross-device ops debugging

* Debugging cross-device decompose

* Debugging cross-device ApproxCompare()

* Update README.md

* CMake cleanup and make format

* CMake OpenCL for VMWare support

* Fixing double build
  • Loading branch information
WrathfulSpatula committed Feb 14, 2019
1 parent 5b0bd1e commit f1fd99c
Show file tree
Hide file tree
Showing 16 changed files with 226 additions and 134 deletions.
37 changes: 21 additions & 16 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,19 @@ add_library (qrack STATIC
src/qunit.cpp
)

if (MSVC)
set(QRACK_LIBS qrack)
else (MSVC)
set(QRACK_LIBS qrack pthread)
endif (MSVC)

# Declare the unittest executable
add_executable (unittest
test/test_main.cpp
test/tests.cpp
)

target_link_libraries (unittest
qrack
pthread
)
target_link_libraries (unittest ${QRACK_LIBS})

add_test (NAME qrack_tests
COMMAND unittest
Expand All @@ -49,10 +52,7 @@ add_executable (benchmarks
test/benchmarks.cpp
)

target_link_libraries (benchmarks
qrack
pthread
)
target_link_libraries (benchmarks ${QRACK_LIBS})

add_test (NAME qrack_benchmarks
COMMAND benchmarks
Expand All @@ -64,10 +64,7 @@ add_executable (accuracy
test/accuracy.cpp
)

target_link_libraries (accuracy
qrack
pthread
)
target_link_libraries (accuracy ${QRACK_LIBS})

add_test (NAME qrack_accuracy
COMMAND accuracy
Expand All @@ -85,8 +82,16 @@ message ("Pure 32-bit compilation is: ${ENABLE_PURE32}")
message ("Single accuracy is: ${ENABLE_COMPLEX8}")
message ("Complex_x2/AVX Support is: ${ENABLE_COMPLEX_X2}")

if (MSVC)
set(QRACK_COMPILE_OPTS -std=c++11 -Wall)
set(TEST_COMPILE_OPTS -std=c++11 -Wall)
else (MSVC)
set(QRACK_COMPILE_OPTS -O3 -std=c++11 -Wall -Werror -fPIC)
set(TEST_COMPILE_OPTS -O3 -std=c++11 -Wall -Werror)
endif(MSVC)

if (ENABLE_COMPLEX_X2 AND NOT ENABLE_COMPLEX8)
set(QRACK_COMPILE_OPTS -mavx)
set(QRACK_COMPILE_OPTS ${QRACK_COMPILE_OPTS} -mavx)
endif (ENABLE_COMPLEX_X2 AND NOT ENABLE_COMPLEX8)

configure_file(include/common/config.h.in include/common/config.h @ONLY)
Expand All @@ -103,9 +108,9 @@ if (APPLE)
set(TEST_COMPILE_OPTS -Wno-inconsistent-missing-override)
endif (APPLE)

target_compile_options (qrack PUBLIC -O3 -std=c++11 -Wall -Werror -fPIC ${QRACK_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (unittest PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (benchmarks PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (qrack PUBLIC ${QRACK_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (unittest PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (benchmarks PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)

set_target_properties (qrack PROPERTIES
VERSION ${PROJECT_VERSION}
Expand Down
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,22 @@ While the OpenCL framework is available by default on most modern Macs, the C++

https://www.khronos.org/registry/OpenCL/

## Building and Installing Qrack on Windows

Qrack supports building on Windows, but some special configuration is required. Windows 10 usually comes with default OpenCL libraries for Intel (or AMD) CPUs and their graphics coprocessors, but NVIDIA graphics card support might require the CUDA Toolkit. The CUDA Toolkit also provides an OpenCL development environment, which is generally necessary to build Qrack.

Qrack requires the `xxd` command to convert its OpenCL kernel code into hexadecimal format for building. `xxd` is not natively available on Windows systems, but Windows executables for it are provided by sources including the [Vim editor Windows port](https://www.vim.org/download.php).

CMake on Windows will set up a 32-bit Visual Studio project by default, (if using Visual Studio). Putting together all of the above considerations, after installing the CUDA Toolkit and Vim, a typical CMake command for Windows might look like this:

```
$ mkdir _build
$ cd _build
$ cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DXXD_BIN="C:/Program Files (x86)/Vim/vim81/xxd.exe" ..
```

After CMake, the project must be built in Visual Studio.

## Performing code coverage

```
Expand Down
22 changes: 6 additions & 16 deletions cmake/Examples.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -4,34 +4,24 @@ add_executable (grovers

set_target_properties(grovers PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (grovers
qrack
pthread
)
target_link_libraries (grovers ${QRACK_LIBS})

add_executable (grovers_lookup
examples/grovers_lookup.cpp
)

set_target_properties(grovers_lookup PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (grovers_lookup
qrack
pthread
)
target_link_libraries (grovers_lookup ${QRACK_LIBS})

add_executable (ordered_list_search
examples/ordered_list_search.cpp
)

set_target_properties(ordered_list_search PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (ordered_list_search
qrack
pthread
)

target_link_libraries (ordered_list_search ${QRACK_LIBS})

target_compile_options (grovers PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers_lookup PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (ordered_list_search PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers_lookup PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (ordered_list_search PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
32 changes: 16 additions & 16 deletions cmake/OpenCL.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,44 +3,44 @@ option (ENABLE_OPENCL "Use OpenCL optimizations" ON)
set (OPENCL_AMDSDK /opt/AMDAPPSDK-3.0 CACHE PATH "Installation path for the installed AMD OpenCL SDK, if used")

# Options used when building the project
find_library (LIB_OPENCL OpenCL)
if (NOT LIB_OPENCL)
find_package (OpenCL)
if (NOT OpenCL_FOUND)
# Attempt with AMD's OpenCL SDK
find_library (LIB_OPENCL OpenCL PATHS ${OPENCL_AMDSDK}/lib/x86_64/)
if (NOT LIB_OPENCL)
set (ENABLE_OPENCL OFF)
else ()
# Found, set the required include path.
set (OPENCL_INCLUDE_PATH ${OPENCL_AMDSDK}/include CACHE PATH "AMD OpenCL SDK Header include path")
set (OPENCL_COMPILATION_OPTIONS
set (OpenCL_INCLUDE_DIRS ${OPENCL_AMDSDK}/include CACHE PATH "AMD OpenCL SDK Header include path")
set (OpenCL_COMPILATION_OPTIONS
-Wno-ignored-attributes
-Wno-deprecated-declarations
CACHE STRING "AMD OpenCL SDK Compilation Option Requirements")
message ("OpenCL support found in the AMD SDK")
endif ()
endif()
endif ()

message ("OpenCL Support is: ${ENABLE_OPENCL}")

if (ENABLE_OPENCL)
message (" libOpenCL: ${LIB_OPENCL}")
message (" Includes: ${OPENCL_INCLUDE_PATH}")
message (" Options: ${OPENCL_COMPILATION_OPTIONS}")
message (" libOpenCL: ${OpenCL_LIBRARIES}")
message (" Includes: ${OpenCL_INCLUDE_DIRS}")
message (" Options: ${OpenCL_COMPILATION_OPTIONS}")
endif ()

if (ENABLE_OPENCL)
target_compile_definitions (qrack PUBLIC CL_HPP_TARGET_OPENCL_VERSION=200)
target_compile_definitions (qrack PUBLIC CL_HPP_MINIMUM_OPENCL_VERSION=100)

# Include the necessary options and libraries to link against
target_include_directories (qrack PUBLIC ${PROJECT_BINARY_DIR} ${OPENCL_INCLUDE_PATH})
target_compile_options (qrack PUBLIC ${OPENCL_COMPILATION_OPTIONS})
target_link_libraries (unittest ${LIB_OPENCL})
target_link_libraries (benchmarks ${LIB_OPENCL})
target_link_libraries (accuracy ${LIB_OPENCL})
target_link_libraries (grovers ${LIB_OPENCL})
target_link_libraries (grovers_lookup ${LIB_OPENCL})
target_link_libraries (ordered_list_search ${LIB_OPENCL})
target_include_directories (qrack PUBLIC ${PROJECT_BINARY_DIR} ${OpenCL_INCLUDE_DIRS})
target_compile_options (qrack PUBLIC ${OpenCL_COMPILATION_OPTIONS})
target_link_libraries (unittest ${OpenCL_LIBRARIES})
target_link_libraries (benchmarks ${OpenCL_LIBRARIES})
target_link_libraries (accuracy ${OpenCL_LIBRARIES})
target_link_libraries (grovers ${OpenCL_LIBRARIES})
target_link_libraries (grovers_lookup ${OpenCL_LIBRARIES})
target_link_libraries (ordered_list_search ${OpenCL_LIBRARIES})


# Build the OpenCL command files
Expand Down
11 changes: 9 additions & 2 deletions include/common/complex16simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,14 @@
#pragma once

#include <cmath>
#if defined(_WIN32)
#include <intrin.h>
#else
#include <emmintrin.h>

#if ENABLE_AVX
#include <smmintrin.h>
#endif
#endif

namespace Qrack {

Expand Down Expand Up @@ -85,7 +88,11 @@ struct Complex16Simd {
_val = _mm_div_pd(_val, _mm_set1_pd(rhs));
return _val;
}
inline Complex16Simd operator-() const { return -_val; }
inline Complex16Simd operator-() const
{
__m128d negOne = _mm_set1_pd(1.0);
return _mm_mul_pd(negOne, _val);
}
inline Complex16Simd operator*=(const double& other)
{
_val = _mm_mul_pd(_val, _mm_set1_pd(other));
Expand Down
10 changes: 9 additions & 1 deletion include/common/complex16x2simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@

#pragma once

#if defined(_WIN32)
#include <intrin.h>
#else
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#endif

namespace Qrack {

Expand Down Expand Up @@ -54,7 +58,11 @@ struct Complex16x2Simd {
return _val2;
}
inline Complex16x2Simd operator*(const double rhs) const { return _mm256_mul_pd(_val2, _mm256_set1_pd(rhs)); }
inline Complex16x2Simd operator-() const { return -_val2; }
inline Complex16x2Simd operator-() const
{
__m256d negOne = _mm256_set1_pd(1.0);
return _mm256_mul_pd(negOne, _val2);
}
inline Complex16x2Simd operator*=(const double& other)
{
_val2 = _mm256_mul_pd(_val2, _mm256_set1_pd(other));
Expand Down
10 changes: 9 additions & 1 deletion include/common/complex8x2simd.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,11 @@

#pragma once

#if defined(_WIN32)
#include <intrin.h>
#else
#include <xmmintrin.h>
#endif

namespace Qrack {

Expand Down Expand Up @@ -55,7 +59,11 @@ struct Complex8x2Simd {
return _val2;
}
inline Complex8x2Simd operator*(const float rhs) const { return _mm_mul_ps(_val2, _mm_set1_ps(rhs)); }
inline Complex8x2Simd operator-() const { return -_val2; }
inline Complex8x2Simd operator-() const
{
__m128 negOne = _mm_set1_ps(-1.0f);
return _mm_mul_ps(negOne, _val2);
}
inline Complex8x2Simd operator*=(const float& other)
{
_val2 = _mm_mul_ps(_val2, _mm_set1_ps(other));
Expand Down
4 changes: 3 additions & 1 deletion include/common/oclengine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@
#include <memory>
#include <mutex>

#ifdef __APPLE__
#if defined(__APPLE__)
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.hpp>
#elif defined(_WIN32)
#include <CL/cl.hpp>
#else
#include <CL/cl2.hpp>
#endif
Expand Down
5 changes: 5 additions & 0 deletions include/qengine_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,13 @@ class QEngineCPU : public QEngine, public ParallelFor {
virtual void FreeStateVec()
{
if (stateVec) {
#if defined(_WIN32)
_aligned_free(stateVec);
#else
free(stateVec);
#endif
}
stateVec = NULL;
}

virtual void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUPtr dest);
Expand Down
9 changes: 6 additions & 3 deletions include/qengine_opencl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -101,9 +101,7 @@ class QEngineOCL : public QEngine {

FreeStateVec();

if (nrmArray) {
free(nrmArray);
}
FreeAligned(nrmArray);
}

virtual void SetQubitCount(bitLenInt qb);
Expand Down Expand Up @@ -209,8 +207,13 @@ class QEngineOCL : public QEngine {
virtual void FreeStateVec()
{
if (stateVec) {
#if defined(_WIN32)
_aligned_free(stateVec);
#else
free(stateVec);
#endif
}
stateVec = NULL;
}
virtual BufferPtr MakeStateVecBuffer(complex* nStateVec);

Expand Down
13 changes: 13 additions & 0 deletions include/qinterface.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

#pragma once

#define _USE_MATH_DEFINES

#include <ctime>
#include <map>
#include <math.h>
Expand Down Expand Up @@ -117,6 +119,17 @@ class QInterface {

template <typename GateFunc> void ControlledLoopFixture(bitLenInt length, GateFunc gate);

void FreeAligned(void* toFree)
{
if (toFree) {
#if defined(_WIN32)
_aligned_free(toFree);
#else
free(toFree);
#endif
}
}

public:
QInterface(bitLenInt n, qrack_rand_gen_ptr rgp = nullptr, bool doNorm = true)
: rand_distribution(0.0, 1.0)
Expand Down
12 changes: 11 additions & 1 deletion src/common/parallel_for.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
// for details.

#define _USE_MATH_DEFINES

#include <atomic>
#include <future>
#include <math.h>
Expand Down Expand Up @@ -136,7 +138,10 @@ void ParallelFor::par_for_mask(
}

/* Pre-calculate the masks to simplify the increment function later. */
bitCapInt masks[maskLen][2];
bitCapInt** masks = new bitCapInt*[maskLen];
for (int i = 0; i < maskLen; i++) {
masks[i] = new bitCapInt[2];
}

bool onlyLow = true;
for (int i = 0; i < maskLen; i++) {
Expand All @@ -161,6 +166,11 @@ void ParallelFor::par_for_mask(

par_for_inc(begin, (end - begin) >> maskLen, incFn, fn);
}

for (int i = 0; i < maskLen; i++) {
delete[] masks[i];
}
delete[] masks;
}

real1 ParallelFor::par_norm(const bitCapInt maxQPower, const complex* stateArray)
Expand Down

0 comments on commit f1fd99c

Please sign in to comment.