Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Debugging on Windows and cross-device #151

Merged
merged 13 commits into from
Feb 14, 2019
Merged
37 changes: 21 additions & 16 deletions CMakeLists.txt
Expand Up @@ -28,16 +28,19 @@ add_library (qrack STATIC
src/qunit.cpp
)

if (MSVC)
set(QRACK_LIBS qrack)
else (MSVC)
set(QRACK_LIBS qrack pthread)
endif (MSVC)

# Declare the unittest executable
add_executable (unittest
test/test_main.cpp
test/tests.cpp
)

target_link_libraries (unittest
qrack
pthread
)
target_link_libraries (unittest ${QRACK_LIBS})

add_test (NAME qrack_tests
COMMAND unittest
Expand All @@ -49,10 +52,7 @@ add_executable (benchmarks
test/benchmarks.cpp
)

target_link_libraries (benchmarks
qrack
pthread
)
target_link_libraries (benchmarks ${QRACK_LIBS})

add_test (NAME qrack_benchmarks
COMMAND benchmarks
Expand All @@ -64,10 +64,7 @@ add_executable (accuracy
test/accuracy.cpp
)

target_link_libraries (accuracy
qrack
pthread
)
target_link_libraries (accuracy ${QRACK_LIBS})

add_test (NAME qrack_accuracy
COMMAND accuracy
Expand All @@ -85,8 +82,16 @@ message ("Pure 32-bit compilation is: ${ENABLE_PURE32}")
message ("Single accuracy is: ${ENABLE_COMPLEX8}")
message ("Complex_x2/AVX Support is: ${ENABLE_COMPLEX_X2}")

if (MSVC)
set(QRACK_COMPILE_OPTS -std=c++11 -Wall)
set(TEST_COMPILE_OPTS -std=c++11 -Wall)
else (MSVC)
set(QRACK_COMPILE_OPTS -O3 -std=c++11 -Wall -Werror -fPIC)
set(TEST_COMPILE_OPTS -O3 -std=c++11 -Wall -Werror)
endif(MSVC)

if (ENABLE_COMPLEX_X2 AND NOT ENABLE_COMPLEX8)
set(QRACK_COMPILE_OPTS -mavx)
set(QRACK_COMPILE_OPTS "${QRACK_COMPILE_OPTS} -mavx")
endif (ENABLE_COMPLEX_X2 AND NOT ENABLE_COMPLEX8)

configure_file(include/common/config.h.in include/common/config.h @ONLY)
Expand All @@ -103,9 +108,9 @@ if (APPLE)
set(TEST_COMPILE_OPTS -Wno-inconsistent-missing-override)
endif (APPLE)

target_compile_options (qrack PUBLIC -O3 -std=c++11 -Wall -Werror -fPIC ${QRACK_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (unittest PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (benchmarks PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (qrack PUBLIC ${QRACK_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (unittest PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (benchmarks PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)

set_target_properties (qrack PROPERTIES
VERSION ${PROJECT_VERSION}
Expand Down
16 changes: 16 additions & 0 deletions README.md
Expand Up @@ -67,6 +67,22 @@ While the OpenCL framework is available by default on most modern Macs, the C++

https://www.khronos.org/registry/OpenCL/

## Building and Installing Qrack on Windows

Qrack supports building on Windows, but some special configuration is required. Windows 10 usually comes with default OpenCL libraries for Intel (or AMD) CPUs and their graphics coprocessors, but NVIDIA graphics card support might require the CUDA Toolkit. The CUDA Toolkit also provides an OpenCL development environment, which is generally necessary to build Qrack.

Qrack requires the `xxd` command to convert its OpenCL kernel code into hexadecimal format for building. `xxd` is not natively available on Windows systems, but Windows executables for it are provided by sources including the [Vim editor Windows port](https://www.vim.org/download.php).

CMake on Windows will set up a 32-bit Visual Studio project by default, (if using Visual Studio). Putting together all of the above considerations, after installing the CUDA Toolkit and Vim, a typical CMake command for Windows might look like this:

```
$ mkdir _build
$ cd _build
$ cmake -DCMAKE_GENERATOR_PLATFORM=x64 -DXXD_BIN="C:/Program Files (x86)/Vim/vim81/xxd.exe" ..
```

After CMake, the project must be built in Visual Studio.

## Performing code coverage

```
Expand Down
22 changes: 6 additions & 16 deletions cmake/Examples.cmake
Expand Up @@ -4,34 +4,24 @@ add_executable (grovers

set_target_properties(grovers PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (grovers
qrack
pthread
)
target_link_libraries (grovers ${QRACK_LIBS})

add_executable (grovers_lookup
examples/grovers_lookup.cpp
)

set_target_properties(grovers_lookup PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (grovers_lookup
qrack
pthread
)
target_link_libraries (grovers_lookup ${QRACK_LIBS})

add_executable (ordered_list_search
examples/ordered_list_search.cpp
)

set_target_properties(ordered_list_search PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/examples")

target_link_libraries (ordered_list_search
qrack
pthread
)

target_link_libraries (ordered_list_search ${QRACK_LIBS})

target_compile_options (grovers PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers_lookup PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (ordered_list_search PUBLIC -O3 -std=c++11 -Wall -Werror ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (grovers_lookup PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
target_compile_options (ordered_list_search PUBLIC ${TEST_COMPILE_OPTS} -DCATCH_CONFIG_FAST_COMPILE)
32 changes: 16 additions & 16 deletions cmake/OpenCL.cmake
Expand Up @@ -3,44 +3,44 @@ option (ENABLE_OPENCL "Use OpenCL optimizations" ON)
set (OPENCL_AMDSDK /opt/AMDAPPSDK-3.0 CACHE PATH "Installation path for the installed AMD OpenCL SDK, if used")

# Options used when building the project
find_library (LIB_OPENCL OpenCL)
if (NOT LIB_OPENCL)
find_package (OpenCL)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't work if you manually installed the SDK (as I had to do on VMWare), as well as requiring a PACKAGE_CMake.txt file to exist in the search path.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Restored.

if (NOT OpenCL_FOUND)
# Attempt with AMD's OpenCL SDK
find_library (LIB_OPENCL OpenCL PATHS ${OPENCL_AMDSDK}/lib/x86_64/)
if (NOT LIB_OPENCL)
set (ENABLE_OPENCL OFF)
else ()
# Found, set the required include path.
set (OPENCL_INCLUDE_PATH ${OPENCL_AMDSDK}/include CACHE PATH "AMD OpenCL SDK Header include path")
set (OPENCL_COMPILATION_OPTIONS
set (OpenCL_INCLUDE_DIRS ${OPENCL_AMDSDK}/include CACHE PATH "AMD OpenCL SDK Header include path")
set (OpenCL_COMPILATION_OPTIONS
-Wno-ignored-attributes
-Wno-deprecated-declarations
CACHE STRING "AMD OpenCL SDK Compilation Option Requirements")
message ("OpenCL support found in the AMD SDK")
endif ()
endif()
endif ()

message ("OpenCL Support is: ${ENABLE_OPENCL}")

if (ENABLE_OPENCL)
message (" libOpenCL: ${LIB_OPENCL}")
message (" Includes: ${OPENCL_INCLUDE_PATH}")
message (" Options: ${OPENCL_COMPILATION_OPTIONS}")
message (" libOpenCL: ${OpenCL_LIBRARIES}")
message (" Includes: ${OpenCL_INCLUDE_DIRS}")
message (" Options: ${OpenCL_COMPILATION_OPTIONS}")
endif ()

if (ENABLE_OPENCL)
target_compile_definitions (qrack PUBLIC CL_HPP_TARGET_OPENCL_VERSION=200)
target_compile_definitions (qrack PUBLIC CL_HPP_MINIMUM_OPENCL_VERSION=100)

# Include the necessary options and libraries to link against
target_include_directories (qrack PUBLIC ${PROJECT_BINARY_DIR} ${OPENCL_INCLUDE_PATH})
target_compile_options (qrack PUBLIC ${OPENCL_COMPILATION_OPTIONS})
target_link_libraries (unittest ${LIB_OPENCL})
target_link_libraries (benchmarks ${LIB_OPENCL})
target_link_libraries (accuracy ${LIB_OPENCL})
target_link_libraries (grovers ${LIB_OPENCL})
target_link_libraries (grovers_lookup ${LIB_OPENCL})
target_link_libraries (ordered_list_search ${LIB_OPENCL})
target_include_directories (qrack PUBLIC ${PROJECT_BINARY_DIR} ${OpenCL_INCLUDE_DIRS})
target_compile_options (qrack PUBLIC ${OpenCL_COMPILATION_OPTIONS})
target_link_libraries (unittest ${OpenCL_LIBRARIES})
target_link_libraries (benchmarks ${OpenCL_LIBRARIES})
target_link_libraries (accuracy ${OpenCL_LIBRARIES})
target_link_libraries (grovers ${OpenCL_LIBRARIES})
target_link_libraries (grovers_lookup ${OpenCL_LIBRARIES})
target_link_libraries (ordered_list_search ${OpenCL_LIBRARIES})


# Build the OpenCL command files
Expand Down
11 changes: 9 additions & 2 deletions include/common/complex16simd.hpp
Expand Up @@ -13,11 +13,14 @@
#pragma once

#include <cmath>
#if defined(_WIN32)
#include <intrin.h>
#else
#include <emmintrin.h>

#if ENABLE_AVX
#include <smmintrin.h>
#endif
#endif

namespace Qrack {

Expand Down Expand Up @@ -85,7 +88,11 @@ struct Complex16Simd {
_val = _mm_div_pd(_val, _mm_set1_pd(rhs));
return _val;
}
inline Complex16Simd operator-() const { return -_val; }
inline Complex16Simd operator-() const
{
__m128d negOne = _mm_set1_pd(1.0);
return _mm_mul_pd(negOne, _val);
}
inline Complex16Simd operator*=(const double& other)
{
_val = _mm_mul_pd(_val, _mm_set1_pd(other));
Expand Down
10 changes: 9 additions & 1 deletion include/common/complex16x2simd.hpp
Expand Up @@ -12,9 +12,13 @@

#pragma once

#if defined(_WIN32)
#include <intrin.h>
#else
#include <emmintrin.h>
#include <immintrin.h>
#include <smmintrin.h>
#endif

namespace Qrack {

Expand Down Expand Up @@ -54,7 +58,11 @@ struct Complex16x2Simd {
return _val2;
}
inline Complex16x2Simd operator*(const double rhs) const { return _mm256_mul_pd(_val2, _mm256_set1_pd(rhs)); }
inline Complex16x2Simd operator-() const { return -_val2; }
inline Complex16x2Simd operator-() const
{
__m256d negOne = _mm256_set1_pd(1.0);
return _mm256_mul_pd(negOne, _val2);
}
inline Complex16x2Simd operator*=(const double& other)
{
_val2 = _mm256_mul_pd(_val2, _mm256_set1_pd(other));
Expand Down
10 changes: 9 additions & 1 deletion include/common/complex8x2simd.hpp
Expand Up @@ -12,7 +12,11 @@

#pragma once

#if defined(_WIN32)
#include <intrin.h>
#else
#include <xmmintrin.h>
#endif

namespace Qrack {

Expand Down Expand Up @@ -55,7 +59,11 @@ struct Complex8x2Simd {
return _val2;
}
inline Complex8x2Simd operator*(const float rhs) const { return _mm_mul_ps(_val2, _mm_set1_ps(rhs)); }
inline Complex8x2Simd operator-() const { return -_val2; }
inline Complex8x2Simd operator-() const
{
__m128 negOne = _mm_set1_ps(-1.0f);
return _mm_mul_ps(negOne, _val2);
}
inline Complex8x2Simd operator*=(const float& other)
{
_val2 = _mm_mul_ps(_val2, _mm_set1_ps(other));
Expand Down
4 changes: 3 additions & 1 deletion include/common/oclengine.hpp
Expand Up @@ -22,9 +22,11 @@
#include <memory>
#include <mutex>

#ifdef __APPLE__
#if defined(__APPLE__)
#define CL_SILENCE_DEPRECATION
#include <OpenCL/cl.hpp>
#elif defined(_WIN32)
#include <CL/cl.hpp>
#else
#include <CL/cl2.hpp>
#endif
Expand Down
5 changes: 5 additions & 0 deletions include/qengine_cpu.hpp
Expand Up @@ -171,8 +171,13 @@ class QEngineCPU : public QEngine, public ParallelFor {
virtual void FreeStateVec()
{
if (stateVec) {
#if defined(_WIN32)
_aligned_free(stateVec);
#else
free(stateVec);
#endif
}
stateVec = NULL;
}

virtual void DecomposeDispose(bitLenInt start, bitLenInt length, QEngineCPUPtr dest);
Expand Down
9 changes: 6 additions & 3 deletions include/qengine_opencl.hpp
Expand Up @@ -101,9 +101,7 @@ class QEngineOCL : public QEngine {

FreeStateVec();

if (nrmArray) {
free(nrmArray);
}
FreeAligned(nrmArray);
}

virtual void SetQubitCount(bitLenInt qb);
Expand Down Expand Up @@ -209,8 +207,13 @@ class QEngineOCL : public QEngine {
virtual void FreeStateVec()
{
if (stateVec) {
#if defined(_WIN32)
_aligned_free(stateVec);
#else
free(stateVec);
#endif
}
stateVec = NULL;
}
virtual BufferPtr MakeStateVecBuffer(complex* nStateVec);

Expand Down
13 changes: 13 additions & 0 deletions include/qinterface.hpp
Expand Up @@ -12,6 +12,8 @@

#pragma once

#define _USE_MATH_DEFINES

#include <ctime>
#include <map>
#include <math.h>
Expand Down Expand Up @@ -117,6 +119,17 @@ class QInterface {

template <typename GateFunc> void ControlledLoopFixture(bitLenInt length, GateFunc gate);

void FreeAligned(void* toFree)
{
if (toFree) {
#if defined(_WIN32)
_aligned_free(toFree);
#else
free(toFree);
#endif
}
}

public:
QInterface(bitLenInt n, qrack_rand_gen_ptr rgp = nullptr, bool doNorm = true)
: rand_distribution(0.0, 1.0)
Expand Down
12 changes: 11 additions & 1 deletion src/common/parallel_for.cpp
Expand Up @@ -10,6 +10,8 @@
// See LICENSE.md in the project root or https://www.gnu.org/licenses/lgpl-3.0.en.html
// for details.

#define _USE_MATH_DEFINES

#include <atomic>
#include <future>
#include <math.h>
Expand Down Expand Up @@ -136,7 +138,10 @@ void ParallelFor::par_for_mask(
}

/* Pre-calculate the masks to simplify the increment function later. */
bitCapInt masks[maskLen][2];
bitCapInt** masks = new bitCapInt*[maskLen];
for (int i = 0; i < maskLen; i++) {
masks[i] = new bitCapInt[2];
}

bool onlyLow = true;
for (int i = 0; i < maskLen; i++) {
Expand All @@ -161,6 +166,11 @@ void ParallelFor::par_for_mask(

par_for_inc(begin, (end - begin) >> maskLen, incFn, fn);
}

for (int i = 0; i < maskLen; i++) {
delete[] masks[i];
}
delete[] masks;
}

real1 ParallelFor::par_norm(const bitCapInt maxQPower, const complex* stateArray)
Expand Down